From 8b2b2dc34f08461b48778a4c0be14fa028f1636d Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Sun, 26 Jan 2025 11:01:10 -0500 Subject: [PATCH 01/12] fix: remove legacy code for GoogleClient and fix model parameters for GenAI --- api/app/clients/GoogleClient.js | 62 ++++--------------- api/server/services/Endpoints/google/build.js | 2 + package-lock.json | 2 +- packages/data-provider/package.json | 2 +- packages/data-provider/src/schemas.ts | 19 ++++++ 5 files changed, 36 insertions(+), 51 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 8fce279bf14..c1d14d7afcd 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -7,6 +7,7 @@ const { ChatGoogleGenerativeAI } = require('@langchain/google-genai'); const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai'); const { AIMessage, HumanMessage, SystemMessage } = require('@langchain/core/messages'); const { + googleGenConfigSchema, validateVisionModel, getResponseSender, endpointSettings, @@ -134,7 +135,6 @@ class GoogleClient extends BaseClient { const { isChatModel } = this; this.isTextModel = !isGenerativeModel && !isChatModel && /code|text/.test(this.modelOptions.model); - const { isTextModel } = this; this.maxContextTokens = this.options.maxContextTokens ?? @@ -170,34 +170,6 @@ class GoogleClient extends BaseClient { this.userLabel = this.options.userLabel || 'User'; this.modelLabel = this.options.modelLabel || 'Assistant'; - if (isChatModel || isGenerativeModel) { - // Use these faux tokens to help the AI understand the context since we are building the chat log ourselves. - // Trying to use "<|im_start|>" causes the AI to still generate "<" or "<|" at the end sometimes for some reason, - // without tripping the stop sequences, so I'm using "||>" instead. - this.startToken = '||>'; - this.endToken = ''; - } else if (isTextModel) { - this.startToken = '||>'; - this.endToken = ''; - } else { - // Previously I was trying to use "<|endoftext|>" but there seems to be some bug with OpenAI's token counting - // system that causes only the first "<|endoftext|>" to be counted as 1 token, and the rest are not treated - // as a single token. So we're using this instead. - this.startToken = '||>'; - this.endToken = ''; - } - - if (!this.modelOptions.stop) { - const stopTokens = [this.startToken]; - if (this.endToken && this.endToken !== this.startToken) { - stopTokens.push(this.endToken); - } - stopTokens.push(`\n${this.userLabel}:`); - stopTokens.push('<|diff_marker|>'); - // I chose not to do one for `modelLabel` because I've never seen it happen - this.modelOptions.stop = stopTokens; - } - if (this.options.reverseProxyUrl) { this.completionsUrl = this.options.reverseProxyUrl; } else { @@ -447,13 +419,6 @@ class GoogleClient extends BaseClient { if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); } - if (promptPrefix) { - // If the prompt prefix doesn't end with the end token, add it. - if (!promptPrefix.endsWith(`${this.endToken}`)) { - promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`; - } - promptPrefix = `\nContext:\n${promptPrefix}`; - } if (identityPrefix) { promptPrefix = `${identityPrefix}${promptPrefix}`; @@ -490,7 +455,7 @@ class GoogleClient extends BaseClient { isCreatedByUser || !isEdited ? `\n\n${message.author}:` : `${promptPrefix}\n\n${message.author}:`; - const messageString = `${messagePrefix}\n${message.content}${this.endToken}\n`; + const messageString = `${messagePrefix}\n${message.content}\n`; let newPromptBody = `${messageString}${promptBody}`; context.unshift(message); @@ -613,7 +578,7 @@ class GoogleClient extends BaseClient { return new ChatVertexAI(clientOptions); } else if (!EXCLUDED_GENAI_MODELS.test(model)) { logger.debug('Creating GenAI client'); - return new GenAI(this.apiKey).getGenerativeModel({ ...clientOptions, model }, requestOptions); + return new GenAI(this.apiKey).getGenerativeModel({ model }, requestOptions); } logger.debug('Creating Chat Google Generative AI client'); @@ -623,6 +588,7 @@ class GoogleClient extends BaseClient { async getCompletion(_payload, options = {}) { const { parameters, instances } = _payload; const { onProgress, abortController } = options; + const safetySettings = this.getSafetySettings(); const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE; const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {}; @@ -676,9 +642,13 @@ class GoogleClient extends BaseClient { const modelName = clientOptions.modelName ?? clientOptions.model ?? ''; if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { + /** @type {GenAI} */ const client = model; + /** @type {import('@google/generative-ai').GenerateContentRequest} */ const requestOptions = { + safetySettings, contents: _payload, + generationConfig: googleGenConfigSchema.parse(clientOptions), }; let promptPrefix = (this.options.promptPrefix ?? '').trim(); @@ -696,8 +666,6 @@ class GoogleClient extends BaseClient { }; } - requestOptions.safetySettings = _payload.safetySettings; - const delay = modelName.includes('flash') ? 8 : 15; const result = await client.generateContentStream(requestOptions); for await (const chunk of result.stream) { @@ -713,7 +681,7 @@ class GoogleClient extends BaseClient { const stream = await model.stream(messages, { signal: abortController.signal, - safetySettings: _payload.safetySettings, + safetySettings, }); let delay = this.options.streamRate || 8; @@ -744,6 +712,7 @@ class GoogleClient extends BaseClient { async titleChatCompletion(_payload, options = {}) { const { abortController } = options; const { parameters, instances } = _payload; + const safetySettings = this.getSafetySettings(); const { messages: _messages, examples: _examples } = instances?.[0] ?? {}; let clientOptions = { ...parameters, maxRetries: 2 }; @@ -780,6 +749,8 @@ class GoogleClient extends BaseClient { const client = model; const requestOptions = { contents: _payload, + safetySettings, + generationConfig: googleGenConfigSchema.parse(clientOptions), }; let promptPrefix = (this.options.promptPrefix ?? '').trim(); @@ -797,9 +768,6 @@ class GoogleClient extends BaseClient { }; } - const safetySettings = _payload.safetySettings; - requestOptions.safetySettings = safetySettings; - const result = await client.generateContent(requestOptions); reply = result.response?.text(); @@ -807,12 +775,10 @@ class GoogleClient extends BaseClient { return reply; } else { logger.debug('Beginning titling'); - const safetySettings = _payload.safetySettings; - const titleResponse = await model.invoke(messages, { signal: abortController.signal, timeout: 7000, - safetySettings: safetySettings, + safetySettings, }); reply = titleResponse.content; @@ -878,8 +844,6 @@ class GoogleClient extends BaseClient { } async sendCompletion(payload, opts = {}) { - payload.safetySettings = this.getSafetySettings(); - let reply = ''; reply = await this.getCompletion(payload, opts); return reply.trim(); diff --git a/api/server/services/Endpoints/google/build.js b/api/server/services/Endpoints/google/build.js index 45f11940ed9..11b048694f5 100644 --- a/api/server/services/Endpoints/google/build.js +++ b/api/server/services/Endpoints/google/build.js @@ -11,6 +11,7 @@ const buildOptions = (endpoint, parsedBody) => { greeting, spec, artifacts, + maxContextTokens, ...modelOptions } = parsedBody; const endpointOption = removeNullishValues({ @@ -22,6 +23,7 @@ const buildOptions = (endpoint, parsedBody) => { iconURL, greeting, spec, + maxContextTokens, modelOptions, }); diff --git a/package-lock.json b/package-lock.json index 4e1db7c3f5a..0b2c165e372 100644 --- a/package-lock.json +++ b/package-lock.json @@ -35030,7 +35030,7 @@ }, "packages/data-provider": { "name": "librechat-data-provider", - "version": "0.7.694", + "version": "0.7.695", "license": "ISC", "dependencies": { "axios": "^1.7.7", diff --git a/packages/data-provider/package.json b/packages/data-provider/package.json index 224b286a261..107504b5c8d 100644 --- a/packages/data-provider/package.json +++ b/packages/data-provider/package.json @@ -1,6 +1,6 @@ { "name": "librechat-data-provider", - "version": "0.7.694", + "version": "0.7.695", "description": "data services for librechat apps", "main": "dist/index.js", "module": "dist/index.es.js", diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index b20691064a2..8e54e377c31 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -788,6 +788,25 @@ export const googleSchema = tConversationSchema maxContextTokens: undefined, })); +/** + * TODO: Map the following fields: + - presence_penalty -> presencePenalty + - frequency_penalty -> frequencyPenalty + - stop -> stopSequences + */ +export const googleGenConfigSchema = z + .object({ + maxOutputTokens: coerceNumber.optional(), + temperature: coerceNumber.optional(), + topP: coerceNumber.optional(), + topK: coerceNumber.optional(), + presencePenalty: coerceNumber.optional(), + frequencyPenalty: coerceNumber.optional(), + stopSequences: z.array(z.string()).optional(), + }) + .strip() + .optional(); + export const bingAISchema = tConversationSchema .pick({ jailbreak: true, From 88d475b92e5fb7ea0d4823552919c14b6f9f6ea4 Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Sun, 26 Jan 2025 18:28:10 -0500 Subject: [PATCH 02/12] refactor: streamline client init logic --- api/app/clients/GoogleClient.js | 91 ++++++++------------------------- 1 file changed, 22 insertions(+), 69 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index c1d14d7afcd..d62d69c5b98 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -5,7 +5,7 @@ const { GoogleVertexAI } = require('@langchain/google-vertexai'); const { ChatGoogleVertexAI } = require('@langchain/google-vertexai'); const { ChatGoogleGenerativeAI } = require('@langchain/google-genai'); const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai'); -const { AIMessage, HumanMessage, SystemMessage } = require('@langchain/core/messages'); +const { HumanMessage, SystemMessage } = require('@langchain/core/messages'); const { googleGenConfigSchema, validateVisionModel, @@ -176,6 +176,7 @@ class GoogleClient extends BaseClient { this.completionsUrl = this.constructUrl(); } + this.initializeClient(); return this; } @@ -308,7 +309,6 @@ class GoogleClient extends BaseClient { messages: [new HumanMessage(formatMessage({ message: latestMessage }))], }, ], - parameters: this.modelOptions, }; return { prompt: payload }; } @@ -352,7 +352,6 @@ class GoogleClient extends BaseClient { .map((message) => formatMessage({ message, langChain: true })), }, ], - parameters: this.modelOptions, }; let promptPrefix = (this.options.promptPrefix ?? '').trim(); @@ -585,16 +584,8 @@ class GoogleClient extends BaseClient { return new ChatGoogleGenerativeAI({ ...clientOptions, apiKey: this.apiKey }); } - async getCompletion(_payload, options = {}) { - const { parameters, instances } = _payload; - const { onProgress, abortController } = options; - const safetySettings = this.getSafetySettings(); - const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE; - const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {}; - - let examples; - - let clientOptions = { ...parameters, maxRetries: 2 }; + initializeClient() { + let clientOptions = { ...this.modelOptions, maxRetries: 2 }; if (this.project_id) { clientOptions['authOptions'] = { @@ -605,33 +596,21 @@ class GoogleClient extends BaseClient { }; } - if (!parameters) { - clientOptions = { ...clientOptions, ...this.modelOptions }; - } - if (this.isGenerativeModel && !this.project_id) { clientOptions.modelName = clientOptions.model; delete clientOptions.model; } - if (_examples && _examples.length) { - examples = _examples - .map((ex) => { - const { input, output } = ex; - if (!input || !output) { - return undefined; - } - return { - input: new HumanMessage(input.content), - output: new AIMessage(output.content), - }; - }) - .filter((ex) => ex); - - clientOptions.examples = examples; - } + this.client = this.createLLM(clientOptions); + } - const model = this.createLLM(clientOptions); + async getCompletion(_payload, options = {}) { + const { instances } = _payload; + const safetySettings = this.getSafetySettings(); + const { onProgress, abortController } = options; + const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE; + const modelName = this.modelOptions.modelName ?? this.modelOptions.model ?? ''; + const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {}; let reply = ''; const messages = this.isTextModel ? _payload.trim() : _messages; @@ -640,15 +619,14 @@ class GoogleClient extends BaseClient { messages.unshift(new SystemMessage(context)); } - const modelName = clientOptions.modelName ?? clientOptions.model ?? ''; if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { /** @type {GenAI} */ - const client = model; + const client = this.client; /** @type {import('@google/generative-ai').GenerateContentRequest} */ const requestOptions = { safetySettings, contents: _payload, - generationConfig: googleGenConfigSchema.parse(clientOptions), + generationConfig: googleGenConfigSchema.parse(this.modelOptions), }; let promptPrefix = (this.options.promptPrefix ?? '').trim(); @@ -679,7 +657,7 @@ class GoogleClient extends BaseClient { return reply; } - const stream = await model.stream(messages, { + const stream = await this.client.stream(messages, { signal: abortController.signal, safetySettings, }); @@ -711,46 +689,22 @@ class GoogleClient extends BaseClient { */ async titleChatCompletion(_payload, options = {}) { const { abortController } = options; - const { parameters, instances } = _payload; + const { instances } = _payload; const safetySettings = this.getSafetySettings(); const { messages: _messages, examples: _examples } = instances?.[0] ?? {}; - let clientOptions = { ...parameters, maxRetries: 2 }; - - logger.debug('Initialized title client options'); - - if (this.project_id) { - clientOptions['authOptions'] = { - credentials: { - ...this.serviceKey, - }, - projectId: this.project_id, - }; - } - - if (!parameters) { - clientOptions = { ...clientOptions, ...this.modelOptions }; - } - - if (this.isGenerativeModel && !this.project_id) { - clientOptions.modelName = clientOptions.model; - delete clientOptions.model; - } - - const model = this.createLLM(clientOptions); - let reply = ''; const messages = this.isTextModel ? _payload.trim() : _messages; - const modelName = clientOptions.modelName ?? clientOptions.model ?? ''; + const modelName = this.modelOptions.modelName ?? this.modelOptions.model ?? ''; if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { logger.debug('Identified titling model as GenAI version'); /** @type {GenerativeModel} */ - const client = model; + const client = this.client; const requestOptions = { contents: _payload, safetySettings, - generationConfig: googleGenConfigSchema.parse(clientOptions), + generationConfig: googleGenConfigSchema.parse(this.modelOptions), }; let promptPrefix = (this.options.promptPrefix ?? '').trim(); @@ -775,7 +729,7 @@ class GoogleClient extends BaseClient { return reply; } else { logger.debug('Beginning titling'); - const titleResponse = await model.invoke(messages, { + const titleResponse = await this.client.invoke(messages, { signal: abortController.signal, timeout: 7000, safetySettings, @@ -810,8 +764,7 @@ class GoogleClient extends BaseClient { logger.warn( `Current vision model does not support titling without an attachment; falling back to default model ${settings.model.default}`, ); - - payload.parameters = { ...payload.parameters, model: settings.model.default }; + this.modelOptions.model = settings.model.default; } try { From 74fd3458691db7cdc6cdc9aa7e1e2c7c6ddd0783 Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Sun, 26 Jan 2025 21:41:03 -0500 Subject: [PATCH 03/12] refactor: remove legacy vertex clients, WIP remote vertex token count --- api/app/clients/GoogleClient.js | 38 ++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index d62d69c5b98..f476358b133 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -1,8 +1,6 @@ const { google } = require('googleapis'); const { Agent, ProxyAgent } = require('undici'); const { ChatVertexAI } = require('@langchain/google-vertexai'); -const { GoogleVertexAI } = require('@langchain/google-vertexai'); -const { ChatGoogleVertexAI } = require('@langchain/google-vertexai'); const { ChatGoogleGenerativeAI } = require('@langchain/google-genai'); const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai'); const { HumanMessage, SystemMessage } = require('@langchain/core/messages'); @@ -50,9 +48,10 @@ class GoogleClient extends BaseClient { const serviceKey = creds[AuthKeys.GOOGLE_SERVICE_KEY] ?? {}; this.serviceKey = serviceKey && typeof serviceKey === 'string' ? JSON.parse(serviceKey) : serviceKey ?? {}; + /** @type {string | null | undefined} */ + this.project_id = this.serviceKey.project_id; this.client_email = this.serviceKey.client_email; this.private_key = this.serviceKey.private_key; - this.project_id = this.serviceKey.project_id; this.access_token = null; this.apiKey = creds[AuthKeys.GOOGLE_API_KEY]; @@ -566,13 +565,7 @@ class GoogleClient extends BaseClient { } } - if (this.project_id && this.isTextModel) { - logger.debug('Creating Google VertexAI client'); - return new GoogleVertexAI(clientOptions); - } else if (this.project_id && this.isChatModel) { - logger.debug('Creating Chat Google VertexAI client'); - return new ChatGoogleVertexAI(clientOptions); - } else if (this.project_id) { + if (this.project_id != null) { logger.debug('Creating VertexAI client'); return new ChatVertexAI(clientOptions); } else if (!EXCLUDED_GENAI_MODELS.test(model)) { @@ -602,6 +595,7 @@ class GoogleClient extends BaseClient { } this.client = this.createLLM(clientOptions); + return this.client; } async getCompletion(_payload, options = {}) { @@ -848,12 +842,32 @@ class GoogleClient extends BaseClient { return 'cl100k_base'; } + async getVertexTokenCount(text) { + /** @type {ChatVertexAI} */ + const client = this.client ?? this.initializeClient(); + const connection = client.connection; + const gAuthClient = connection.client; + const tokenEndpoint = `https://${connection._endpoint}/${connection.apiVersion}/projects/${this.project_id}/locations/${connection._location}/publishers/google/models/${connection.model}/:countTokens`; + const result = await gAuthClient.request({ + url: tokenEndpoint, + method: 'POST', + data: { + contents: [{ role: 'user', parts: [{ text }] }], + }, + }); + console.dir(result, { depth: null }); + return result; + } + /** * Returns the token count of a given text. It also checks and resets the tokenizers if necessary. * @param {string} text - The text to get the token count for. - * @returns {number} The token count of the given text. + * @returns {Promise<number>} The token count of the given text. */ - getTokenCount(text) { + async getTokenCount(text) { + if (this.client instanceof ChatVertexAI) { + return await this.getVertexTokenCount(text); + } const encoding = this.getEncoding(); return Tokenizer.getTokenCount(text, encoding); } From 1e92ec5ace7f41142f3fb25fdac8ef8a6b27110a Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 08:18:51 -0500 Subject: [PATCH 04/12] refactor: enhance GoogleClient with improved type definitions and streamline token count method --- api/app/clients/GoogleClient.js | 14 +++++++------- api/typedefs.js | 6 ++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index f476358b133..57c2b9fe4de 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -323,6 +323,10 @@ class GoogleClient extends BaseClient { return { prompt: formattedMessages }; } + /** + * @param {TMessage[]} [messages=[]] + * @param {string} [parentMessageId] + */ async buildMessages(messages = [], parentMessageId) { if (!this.isGenerativeModel && !this.project_id) { throw new Error( @@ -616,7 +620,7 @@ class GoogleClient extends BaseClient { if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { /** @type {GenAI} */ const client = this.client; - /** @type {import('@google/generative-ai').GenerateContentRequest} */ + /** @type {GenerateContentRequest} */ const requestOptions = { safetySettings, contents: _payload, @@ -855,19 +859,15 @@ class GoogleClient extends BaseClient { contents: [{ role: 'user', parts: [{ text }] }], }, }); - console.dir(result, { depth: null }); return result; } /** * Returns the token count of a given text. It also checks and resets the tokenizers if necessary. * @param {string} text - The text to get the token count for. - * @returns {Promise<number>} The token count of the given text. + * @returns {number} The token count of the given text. */ - async getTokenCount(text) { - if (this.client instanceof ChatVertexAI) { - return await this.getVertexTokenCount(text); - } + getTokenCount(text) { const encoding = this.getEncoding(); return Tokenizer.getTokenCount(text, encoding); } diff --git a/api/typedefs.js b/api/typedefs.js index 186c0e4a528..12e6f048614 100644 --- a/api/typedefs.js +++ b/api/typedefs.js @@ -155,6 +155,12 @@ * @memberof typedefs */ +/** + * @exports GenerateContentRequest + * @typedef {import('@google/generative-ai').GenerateContentRequest} GenerateContentRequest + * @memberof typedefs + */ + /** * @exports AssistantStreamEvent * @typedef {import('openai').default.Beta.AssistantStreamEvent} AssistantStreamEvent From 61b17faa57849be72107b3c4079ffaec0423651f Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 08:39:01 -0500 Subject: [PATCH 05/12] refactor: remove unused methods and consolidate methods --- api/app/clients/GoogleClient.js | 49 +-------------------------------- 1 file changed, 1 insertion(+), 48 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 57c2b9fe4de..7f34159f0de 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -1,5 +1,4 @@ const { google } = require('googleapis'); -const { Agent, ProxyAgent } = require('undici'); const { ChatVertexAI } = require('@langchain/google-vertexai'); const { ChatGoogleGenerativeAI } = require('@langchain/google-genai'); const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai'); @@ -386,10 +385,7 @@ class GoogleClient extends BaseClient { parentMessageId, }); - const formattedMessages = orderedMessages.map((message) => ({ - author: message.isCreatedByUser ? this.userLabel : this.modelLabel, - content: message?.content ?? message.text, - })); + const formattedMessages = orderedMessages.map(this.formatMessages()); let lastAuthor = ''; let groupedMessages = []; @@ -523,34 +519,6 @@ class GoogleClient extends BaseClient { return { prompt, context }; } - async _getCompletion(payload, abortController = null) { - if (!abortController) { - abortController = new AbortController(); - } - const { debug } = this.options; - const url = this.completionsUrl; - if (debug) { - logger.debug('GoogleClient _getCompletion', { url, payload }); - } - const opts = { - method: 'POST', - agent: new Agent({ - bodyTimeout: 0, - headersTimeout: 0, - }), - signal: abortController.signal, - }; - - if (this.options.proxy) { - opts.agent = new ProxyAgent(this.options.proxy); - } - - const client = await this.getClient(); - const res = await client.request({ url, method: 'POST', data: payload }); - logger.debug('GoogleClient _getCompletion', { res }); - return res.data; - } - createLLM(clientOptions) { const model = clientOptions.modelName ?? clientOptions.model; clientOptions.location = loc; @@ -705,21 +673,6 @@ class GoogleClient extends BaseClient { generationConfig: googleGenConfigSchema.parse(this.modelOptions), }; - let promptPrefix = (this.options.promptPrefix ?? '').trim(); - if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { - promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); - } - - if (this.options?.promptPrefix?.length) { - requestOptions.systemInstruction = { - parts: [ - { - text: promptPrefix, - }, - ], - }; - } - const result = await client.generateContent(requestOptions); reply = result.response?.text(); From 77580cddb59684cd0845842a1a080e6a1a356675 Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 08:46:18 -0500 Subject: [PATCH 06/12] refactor: remove examples --- api/app/clients/GoogleClient.js | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 7f34159f0de..1d02e84f88e 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -118,10 +118,6 @@ class GoogleClient extends BaseClient { this.options = options; } - this.options.examples = (this.options.examples ?? []) - .filter((ex) => ex) - .filter((obj) => obj.input.content !== '' && obj.output.content !== ''); - this.modelOptions = this.options.modelOptions || {}; this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments)); @@ -365,12 +361,7 @@ class GoogleClient extends BaseClient { payload.instances[0].context = promptPrefix; } - if (this.options.examples.length > 0) { - payload.instances[0].examples = this.options.examples; - } - logger.debug('[GoogleClient] buildMessages', payload); - return { prompt: payload }; } @@ -574,9 +565,9 @@ class GoogleClient extends BaseClient { const { instances } = _payload; const safetySettings = this.getSafetySettings(); const { onProgress, abortController } = options; + const { messages: _messages, context } = instances?.[0] ?? {}; const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE; const modelName = this.modelOptions.modelName ?? this.modelOptions.model ?? ''; - const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {}; let reply = ''; const messages = this.isTextModel ? _payload.trim() : _messages; @@ -657,7 +648,7 @@ class GoogleClient extends BaseClient { const { abortController } = options; const { instances } = _payload; const safetySettings = this.getSafetySettings(); - const { messages: _messages, examples: _examples } = instances?.[0] ?? {}; + const { messages: _messages } = instances?.[0] ?? {}; let reply = ''; const messages = this.isTextModel ? _payload.trim() : _messages; From 75b7980542261a128e61aed74fc6643cd3342a59 Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 10:09:20 -0500 Subject: [PATCH 07/12] refactor: improve input handling logic in DynamicInput component --- .../SidePanel/Parameters/DynamicInput.tsx | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/client/src/components/SidePanel/Parameters/DynamicInput.tsx b/client/src/components/SidePanel/Parameters/DynamicInput.tsx index 36d7fd179aa..cba8dd66040 100644 --- a/client/src/components/SidePanel/Parameters/DynamicInput.tsx +++ b/client/src/components/SidePanel/Parameters/DynamicInput.tsx @@ -48,12 +48,15 @@ function DynamicInput({ const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => { const value = e.target.value; - if (type === 'number') { - if (!isNaN(Number(value))) { - setInputValue(e, true); - } - } else { + if (type !== 'number') { setInputValue(e); + return; + } + + if (value === '') { + setInputValue(e); + } else if (!isNaN(Number(value))) { + setInputValue(e, true); } }; From 2cacc12c3d22bd61c67fed1a6bc002f86a86a393 Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 11:08:13 -0500 Subject: [PATCH 08/12] refactor: enhance GoogleClient with token usage tracking and context handling improvements --- api/app/clients/GoogleClient.js | 160 ++++++++++++++++++++++++++++---- api/typedefs.js | 6 ++ 2 files changed, 147 insertions(+), 19 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 1d02e84f88e..39e09dc63c5 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -1,4 +1,5 @@ const { google } = require('googleapis'); +const { concat } = require('@langchain/core/utils/stream'); const { ChatVertexAI } = require('@langchain/google-vertexai'); const { ChatGoogleGenerativeAI } = require('@langchain/google-genai'); const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai'); @@ -10,11 +11,13 @@ const { endpointSettings, EModelEndpoint, VisionModes, + ErrorTypes, Constants, AuthKeys, } = require('librechat-data-provider'); const { encodeAndFormat } = require('~/server/services/Files/images'); const Tokenizer = require('~/server/services/Tokenizer'); +const { spendTokens } = require('~/models/spendTokens'); const { getModelMaxTokens } = require('~/utils'); const { sleep } = require('~/server/utils'); const { logger } = require('~/config'); @@ -59,6 +62,15 @@ class GoogleClient extends BaseClient { this.authHeader = options.authHeader; + /** @type {UsageMetadata | undefined} */ + this.usage; + /** The key for the usage object's input tokens + * @type {string} */ + this.inputTokensKey = 'input_tokens'; + /** The key for the usage object's output tokens + * @type {string} */ + this.outputTokensKey = 'output_tokens'; + if (options.skipSetOptions) { return; } @@ -170,6 +182,11 @@ class GoogleClient extends BaseClient { this.completionsUrl = this.constructUrl(); } + let promptPrefix = (this.options.promptPrefix ?? '').trim(); + if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { + promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); + } + this.options.promptPrefix = promptPrefix; this.initializeClient(); return this; } @@ -322,19 +339,56 @@ class GoogleClient extends BaseClient { * @param {TMessage[]} [messages=[]] * @param {string} [parentMessageId] */ - async buildMessages(messages = [], parentMessageId) { + async buildMessages(_messages = [], parentMessageId) { if (!this.isGenerativeModel && !this.project_id) { throw new Error( '[GoogleClient] a Service Account JSON Key is required for PaLM 2 and Codey models (Vertex AI)', ); } + if (this.options.promptPrefix) { + const instructionsTokenCount = this.getTokenCount(this.options.promptPrefix); + + this.maxContextTokens = this.maxContextTokens - instructionsTokenCount; + if (this.maxContextTokens < 0) { + const info = `${instructionsTokenCount} / ${this.maxContextTokens}`; + const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; + logger.warn(`Instructions token count exceeds max context (${info}).`); + throw new Error(errorMessage); + } + } + + for (let i = 0; i < _messages.length; i++) { + const message = _messages[i]; + if (!message.tokenCount) { + _messages[i].tokenCount = this.getTokenCountForMessage({ + role: message.isCreatedByUser ? 'user' : 'assistant', + content: message.content ?? message.text, + }); + } + } + + const { + payload: messages, + tokenCountMap, + promptTokens, + } = await this.handleContextStrategy({ + orderedMessages: _messages, + formattedMessages: _messages, + }); + if (!this.project_id && !EXCLUDED_GENAI_MODELS.test(this.modelOptions.model)) { - return await this.buildGenerativeMessages(messages); + const result = await this.buildGenerativeMessages(messages); + result.tokenCountMap = tokenCountMap; + result.promptTokens = promptTokens; + return result; } if (this.options.attachments && this.isGenerativeModel) { - return this.buildVisionMessages(messages, parentMessageId); + const result = this.buildVisionMessages(messages, parentMessageId); + result.tokenCountMap = tokenCountMap; + result.promptTokens = promptTokens; + return result; } if (this.isTextModel) { @@ -352,17 +406,12 @@ class GoogleClient extends BaseClient { ], }; - let promptPrefix = (this.options.promptPrefix ?? '').trim(); - if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { - promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); - } - - if (promptPrefix) { - payload.instances[0].context = promptPrefix; + if (this.options.promptPrefix) { + payload.instances[0].context = this.options.promptPrefix; } logger.debug('[GoogleClient] buildMessages', payload); - return { prompt: payload }; + return { prompt: payload, tokenCountMap, promptTokens }; } async buildMessagesPrompt(messages, parentMessageId) { @@ -405,9 +454,6 @@ class GoogleClient extends BaseClient { } let promptPrefix = (this.options.promptPrefix ?? '').trim(); - if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { - promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); - } if (identityPrefix) { promptPrefix = `${identityPrefix}${promptPrefix}`; @@ -586,11 +632,7 @@ class GoogleClient extends BaseClient { generationConfig: googleGenConfigSchema.parse(this.modelOptions), }; - let promptPrefix = (this.options.promptPrefix ?? '').trim(); - if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { - promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); - } - + const promptPrefix = (this.options.promptPrefix ?? '').trim(); if (promptPrefix.length) { requestOptions.systemInstruction = { parts: [ @@ -602,8 +644,13 @@ class GoogleClient extends BaseClient { } const delay = modelName.includes('flash') ? 8 : 15; + /** @type {GenAIUsageMetadata} */ + let usageMetadata; const result = await client.generateContentStream(requestOptions); for await (const chunk of result.stream) { + usageMetadata = !usageMetadata + ? chunk?.usageMetadata + : Object.assign(usageMetadata, chunk?.usageMetadata); const chunkText = chunk.text(); await this.generateTextStream(chunkText, onProgress, { delay, @@ -611,11 +658,21 @@ class GoogleClient extends BaseClient { reply += chunkText; await sleep(streamRate); } + + if (usageMetadata) { + this.usage = { + input_tokens: usageMetadata.promptTokenCount, + output_tokens: usageMetadata.candidatesTokenCount, + }; + } return reply; } + /** @type {import('@langchain/core/messages').AIMessageChunk['usage_metadata']} */ + let usageMetadata; const stream = await this.client.stream(messages, { signal: abortController.signal, + streamUsage: true, safetySettings, }); @@ -631,6 +688,9 @@ class GoogleClient extends BaseClient { } for await (const chunk of stream) { + usageMetadata = !usageMetadata + ? chunk?.usage_metadata + : concat(usageMetadata, chunk?.usage_metadata); const chunkText = chunk?.content ?? chunk; await this.generateTextStream(chunkText, onProgress, { delay, @@ -638,9 +698,70 @@ class GoogleClient extends BaseClient { reply += chunkText; } + if (usageMetadata) { + this.usage = usageMetadata; + } return reply; } + /** + * Get stream usage as returned by this client's API response. + * @returns {UsageMetadata} The stream usage object. + */ + getStreamUsage() { + return this.usage; + } + + /** + * Calculates the correct token count for the current user message based on the token count map and API usage. + * Edge case: If the calculation results in a negative value, it returns the original estimate. + * If revisiting a conversation with a chat history entirely composed of token estimates, + * the cumulative token count going forward should become more accurate as the conversation progresses. + * @param {Object} params - The parameters for the calculation. + * @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts. + * @param {string} params.currentMessageId - The ID of the current message to calculate. + * @param {UsageMetadata} params.usage - The usage object returned by the API. + * @returns {number} The correct token count for the current user message. + */ + calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) { + const originalEstimate = tokenCountMap[currentMessageId] || 0; + + if (!usage || typeof usage.input_tokens !== 'number') { + return originalEstimate; + } + + tokenCountMap[currentMessageId] = 0; + const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => { + const numCount = Number(count); + return sum + (isNaN(numCount) ? 0 : numCount); + }, 0); + const totalInputTokens = usage.input_tokens ?? 0; + const currentMessageTokens = totalInputTokens - totalTokensFromMap; + return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate; + } + + /** + * @param {object} params + * @param {number} params.promptTokens + * @param {number} params.completionTokens + * @param {UsageMetadata} [params.usage] + * @param {string} [params.model] + * @param {string} [params.context='message'] + * @returns {Promise<void>} + */ + async recordTokenUsage({ promptTokens, completionTokens, model, context = 'message' }) { + await spendTokens( + { + context, + user: this.user, + conversationId: this.conversationId, + model: model ?? this.modelOptions.model, + endpointTokenConfig: this.options.endpointTokenConfig, + }, + { promptTokens, completionTokens }, + ); + } + /** * Stripped-down logic for generating a title. This uses the non-streaming APIs, since the user does not see titles streaming */ @@ -726,6 +847,7 @@ class GoogleClient extends BaseClient { endpointType: null, artifacts: this.options.artifacts, promptPrefix: this.options.promptPrefix, + maxContextTokens: this.options.maxContextTokens, modelLabel: this.options.modelLabel, iconURL: this.options.iconURL, greeting: this.options.greeting, diff --git a/api/typedefs.js b/api/typedefs.js index 12e6f048614..b1960f4cb60 100644 --- a/api/typedefs.js +++ b/api/typedefs.js @@ -161,6 +161,12 @@ * @memberof typedefs */ +/** + * @exports GenAIUsageMetadata + * @typedef {import('@google/generative-ai').UsageMetadata} GenAIUsageMetadata + * @memberof typedefs + */ + /** * @exports AssistantStreamEvent * @typedef {import('openai').default.Beta.AssistantStreamEvent} AssistantStreamEvent From 7668e3cb865b0969854f24148135a4dbd83a6f16 Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 11:34:09 -0500 Subject: [PATCH 09/12] refactor: update GoogleClient to support 'learnlm' model and streamline model checks --- api/app/clients/GoogleClient.js | 30 +++++++------------ api/server/services/Endpoints/google/llm.js | 15 +--------- .../Endpoints/MessageEndpointIcon.tsx | 10 +++++-- packages/data-provider/src/parsers.ts | 2 +- 4 files changed, 21 insertions(+), 36 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 39e09dc63c5..a031eb67192 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -135,12 +135,8 @@ class GoogleClient extends BaseClient { this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments)); /** @type {boolean} Whether using a "GenerativeAI" Model */ - this.isGenerativeModel = this.modelOptions.model.includes('gemini'); - const { isGenerativeModel } = this; - this.isChatModel = !isGenerativeModel && this.modelOptions.model.includes('chat'); - const { isChatModel } = this; - this.isTextModel = - !isGenerativeModel && !isChatModel && /code|text/.test(this.modelOptions.model); + this.isGenerativeModel = + this.modelOptions.model.includes('gemini') || this.modelOptions.model.includes('learnlm'); this.maxContextTokens = this.options.maxContextTokens ?? @@ -341,9 +337,7 @@ class GoogleClient extends BaseClient { */ async buildMessages(_messages = [], parentMessageId) { if (!this.isGenerativeModel && !this.project_id) { - throw new Error( - '[GoogleClient] a Service Account JSON Key is required for PaLM 2 and Codey models (Vertex AI)', - ); + throw new Error('[GoogleClient] PaLM 2 and Codey models are no longer supported.'); } if (this.options.promptPrefix) { @@ -608,15 +602,12 @@ class GoogleClient extends BaseClient { } async getCompletion(_payload, options = {}) { - const { instances } = _payload; const safetySettings = this.getSafetySettings(); const { onProgress, abortController } = options; - const { messages: _messages, context } = instances?.[0] ?? {}; const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE; const modelName = this.modelOptions.modelName ?? this.modelOptions.model ?? ''; let reply = ''; - const messages = this.isTextModel ? _payload.trim() : _messages; if (!this.isVisionModel && context && messages?.length > 0) { messages.unshift(new SystemMessage(context)); @@ -668,6 +659,9 @@ class GoogleClient extends BaseClient { return reply; } + const { instances } = _payload; + const { messages: messages, context } = instances?.[0] ?? {}; + /** @type {import('@langchain/core/messages').AIMessageChunk['usage_metadata']} */ let usageMetadata; const stream = await this.client.stream(messages, { @@ -767,12 +761,9 @@ class GoogleClient extends BaseClient { */ async titleChatCompletion(_payload, options = {}) { const { abortController } = options; - const { instances } = _payload; const safetySettings = this.getSafetySettings(); - const { messages: _messages } = instances?.[0] ?? {}; let reply = ''; - const messages = this.isTextModel ? _payload.trim() : _messages; const modelName = this.modelOptions.modelName ?? this.modelOptions.model ?? ''; if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { @@ -782,16 +773,17 @@ class GoogleClient extends BaseClient { const requestOptions = { contents: _payload, safetySettings, - generationConfig: googleGenConfigSchema.parse(this.modelOptions), + generationConfig: { + temperature: 0.5, + }, }; const result = await client.generateContent(requestOptions); - reply = result.response?.text(); - return reply; } else { - logger.debug('Beginning titling'); + const { instances } = _payload; + const { messages } = instances?.[0] ?? {}; const titleResponse = await this.client.invoke(messages, { signal: abortController.signal, timeout: 7000, diff --git a/api/server/services/Endpoints/google/llm.js b/api/server/services/Endpoints/google/llm.js index f19d0539c72..ae5e268ac3a 100644 --- a/api/server/services/Endpoints/google/llm.js +++ b/api/server/services/Endpoints/google/llm.js @@ -1,9 +1,6 @@ const { Providers } = require('@librechat/agents'); const { AuthKeys } = require('librechat-data-provider'); -// Example internal constant from your code -const EXCLUDED_GENAI_MODELS = /gemini-(?:1\.0|1-0|pro)/; - /** * * @param {boolean} isGemini2 @@ -89,22 +86,12 @@ function getLLMConfig(credentials, options = {}) { /** Used only for Safety Settings */ const isGemini2 = llmConfig.model.includes('gemini-2.0') && !llmConfig.model.includes('thinking'); - const isGenerativeModel = llmConfig.model.includes('gemini'); - const isChatModel = !isGenerativeModel && llmConfig.model.includes('chat'); - const isTextModel = !isGenerativeModel && !isChatModel && /code|text/.test(llmConfig.model); - llmConfig.safetySettings = getSafetySettings(isGemini2); let provider; - if (project_id && isTextModel) { - provider = Providers.VERTEXAI; - } else if (project_id && isChatModel) { + if (project_id) { provider = Providers.VERTEXAI; - } else if (project_id) { - provider = Providers.VERTEXAI; - } else if (!EXCLUDED_GENAI_MODELS.test(llmConfig.model)) { - provider = Providers.GOOGLE; } else { provider = Providers.GOOGLE; } diff --git a/client/src/components/Endpoints/MessageEndpointIcon.tsx b/client/src/components/Endpoints/MessageEndpointIcon.tsx index 5c6b35bf7f3..a4d4488579a 100644 --- a/client/src/components/Endpoints/MessageEndpointIcon.tsx +++ b/client/src/components/Endpoints/MessageEndpointIcon.tsx @@ -34,7 +34,10 @@ function getOpenAIColor(_model: string | null | undefined) { function getGoogleIcon(model: string | null | undefined, size: number) { if (model?.toLowerCase().includes('code') === true) { return <CodeyIcon size={size * 0.75} />; - } else if (model?.toLowerCase().includes('gemini') === true) { + } else if ( + model?.toLowerCase().includes('gemini') === true || + model?.toLowerCase().includes('learnlm') === true + ) { return <GeminiIcon size={size * 0.7} />; } else { return <PaLMIcon size={size * 0.7} />; @@ -44,7 +47,10 @@ function getGoogleIcon(model: string | null | undefined, size: number) { function getGoogleModelName(model: string | null | undefined) { if (model?.toLowerCase().includes('code') === true) { return 'Codey'; - } else if (model?.toLowerCase().includes('gemini') === true) { + } else if ( + model?.toLowerCase().includes('gemini') === true || + model?.toLowerCase().includes('learnlm') === true + ) { return 'Gemini'; } else { return 'PaLM2'; diff --git a/packages/data-provider/src/parsers.ts b/packages/data-provider/src/parsers.ts index 656b54c29e3..71b30449176 100644 --- a/packages/data-provider/src/parsers.ts +++ b/packages/data-provider/src/parsers.ts @@ -272,7 +272,7 @@ export const getResponseSender = (endpointOption: t.TEndpointOption): string => if (endpoint === EModelEndpoint.google) { if (modelLabel) { return modelLabel; - } else if (model && model.includes('gemini')) { + } else if (model && (model.includes('gemini') || model.includes('learnlm'))) { return 'Gemini'; } else if (model && model.includes('code')) { return 'Codey'; From f11bb0d73469bff91c94d0255ad9c9c4fb4b9911 Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 11:35:12 -0500 Subject: [PATCH 10/12] refactor: remove unused text model handling in GoogleClient --- api/app/clients/GoogleClient.js | 4 ---- 1 file changed, 4 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index a031eb67192..5a70e02549e 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -385,10 +385,6 @@ class GoogleClient extends BaseClient { return result; } - if (this.isTextModel) { - return this.buildMessagesPrompt(messages, parentMessageId); - } - let payload = { instances: [ { From 093ce85d41425e75c8a08ef5918267b1d3ee7c5f Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 12:00:59 -0500 Subject: [PATCH 11/12] refactor: record token usage for GoogleClient titles and handle edge cases --- api/app/clients/GoogleClient.js | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 5a70e02549e..8ae20accd57 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -605,10 +605,6 @@ class GoogleClient extends BaseClient { let reply = ''; - if (!this.isVisionModel && context && messages?.length > 0) { - messages.unshift(new SystemMessage(context)); - } - if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { /** @type {GenAI} */ const client = this.client; @@ -658,6 +654,10 @@ class GoogleClient extends BaseClient { const { instances } = _payload; const { messages: messages, context } = instances?.[0] ?? {}; + if (!this.isVisionModel && context && messages?.length > 0) { + messages.unshift(new SystemMessage(context)); + } + /** @type {import('@langchain/core/messages').AIMessageChunk['usage_metadata']} */ let usageMetadata; const stream = await this.client.stream(messages, { @@ -743,7 +743,7 @@ class GoogleClient extends BaseClient { await spendTokens( { context, - user: this.user, + user: this.user ?? this.options.req?.user?.id, conversationId: this.conversationId, model: model ?? this.modelOptions.model, endpointTokenConfig: this.options.endpointTokenConfig, @@ -761,8 +761,8 @@ class GoogleClient extends BaseClient { let reply = ''; - const modelName = this.modelOptions.modelName ?? this.modelOptions.model ?? ''; - if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { + const model = this.modelOptions.modelName ?? this.modelOptions.model ?? ''; + if (!EXCLUDED_GENAI_MODELS.test(model) && !this.project_id) { logger.debug('Identified titling model as GenAI version'); /** @type {GenerativeModel} */ const client = this.client; @@ -786,8 +786,16 @@ class GoogleClient extends BaseClient { safetySettings, }); + if (titleResponse.usage_metadata) { + await this.recordTokenUsage({ + model, + promptTokens: titleResponse.usage_metadata.input_tokens, + completionTokens: titleResponse.usage_metadata.output_tokens, + context: 'title', + }); + } + reply = titleResponse.content; - // TODO: RECORD TOKEN USAGE return reply; } } @@ -811,6 +819,10 @@ class GoogleClient extends BaseClient { }, ]); + const model = process.env.GOOGLE_TITLE_MODEL ?? this.modelOptions.model; + const availableModels = this.options.modelsConfig?.[EModelEndpoint.google]; + this.isVisionModel = validateVisionModel({ model, availableModels }); + if (this.isVisionModel) { logger.warn( `Current vision model does not support titling without an attachment; falling back to default model ${settings.model.default}`, @@ -819,6 +831,7 @@ class GoogleClient extends BaseClient { } try { + this.initializeClient(); title = await this.titleChatCompletion(payload, { abortController: new AbortController(), onProgress: () => {}, From 9ab61c362943cf0f8562e001b2439eaf10f9edda Mon Sep 17 00:00:00 2001 From: Danny Avila <danny@librechat.ai> Date: Mon, 27 Jan 2025 12:14:28 -0500 Subject: [PATCH 12/12] chore: remove unused undici, addresses verbose version warning --- api/app/clients/ChatGPTClient.js | 9 --------- api/package.json | 1 - package-lock.json | 10 ---------- 3 files changed, 20 deletions(-) diff --git a/api/app/clients/ChatGPTClient.js b/api/app/clients/ChatGPTClient.js index 6a7ba7b9896..5450300a178 100644 --- a/api/app/clients/ChatGPTClient.js +++ b/api/app/clients/ChatGPTClient.js @@ -13,7 +13,6 @@ const { const { extractBaseURL, constructAzureURL, genAzureChatCompletion } = require('~/utils'); const { createContextHandlers } = require('./prompts'); const { createCoherePayload } = require('./llm'); -const { Agent, ProxyAgent } = require('undici'); const BaseClient = require('./BaseClient'); const { logger } = require('~/config'); @@ -186,10 +185,6 @@ class ChatGPTClient extends BaseClient { headers: { 'Content-Type': 'application/json', }, - dispatcher: new Agent({ - bodyTimeout: 0, - headersTimeout: 0, - }), }; if (this.isVisionModel) { @@ -275,10 +270,6 @@ class ChatGPTClient extends BaseClient { opts.headers['X-Title'] = 'LibreChat'; } - if (this.options.proxy) { - opts.dispatcher = new ProxyAgent(this.options.proxy); - } - /* hacky fixes for Mistral AI API: - Re-orders system message to the top of the messages payload, as not allowed anywhere else - If there is only one message and it's a system message, change the role to user diff --git a/api/package.json b/api/package.json index 80c07afd676..fe8b1f1f280 100644 --- a/api/package.json +++ b/api/package.json @@ -102,7 +102,6 @@ "tiktoken": "^1.0.15", "traverse": "^0.6.7", "ua-parser-js": "^1.0.36", - "undici": "^7.2.3", "winston": "^3.11.0", "winston-daily-rotate-file": "^4.7.1", "zod": "^3.22.4" diff --git a/package-lock.json b/package-lock.json index 0b2c165e372..908eef40952 100644 --- a/package-lock.json +++ b/package-lock.json @@ -111,7 +111,6 @@ "tiktoken": "^1.0.15", "traverse": "^0.6.7", "ua-parser-js": "^1.0.36", - "undici": "^7.2.3", "winston": "^3.11.0", "winston-daily-rotate-file": "^4.7.1", "zod": "^3.22.4" @@ -33055,15 +33054,6 @@ "integrity": "sha512-WxONCrssBM8TSPRqN5EmsjVrsv4A8X12J4ArBiiayv3DyyG3ZlIg6yysuuSYdZsVz3TKcTg2fd//Ujd4CHV1iA==", "dev": true }, - "node_modules/undici": { - "version": "7.2.3", - "resolved": "https://registry.npmjs.org/undici/-/undici-7.2.3.tgz", - "integrity": "sha512-2oSLHaDalSt2/O/wHA9M+/ZPAOcU2yrSP/cdBYJ+YxZskiPYDSqHbysLSlD7gq3JMqOoJI5O31RVU3BxX/MnAA==", - "license": "MIT", - "engines": { - "node": ">=20.18.1" - } - }, "node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",