From 29e26d383efcecf8ce41b105f06e89f06b60fa9d Mon Sep 17 00:00:00 2001 From: anilb Date: Fri, 5 Sep 2025 10:39:24 +0200 Subject: [PATCH 01/21] feat: copilot - keeping track of conversation_id Signed-off-by: anilb --- .gitignore | 4 +- ...3486__addConversationIdToChatResponses.sql | 8 ++ .../copilot/components/copilot-sidebar.vue | 36 ++++++--- .../copilot/store/copilot.api.service.ts | 32 ++++++-- .../modules/copilot/types/copilot.types.ts | 3 +- frontend/lib/chat/agents/base-agent.ts | 1 - frontend/lib/chat/data-copilot.ts | 78 +++++++++---------- frontend/server/api/chat/stream.ts | 7 +- frontend/server/repo/chat.repo.ts | 10 ++- 9 files changed, 117 insertions(+), 62 deletions(-) create mode 100644 database/migrations/V1756973486__addConversationIdToChatResponses.sql diff --git a/.gitignore b/.gitignore index 0c25f88c9..bb828f401 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ scripts/scaffold.yaml node_modules .prettierrc **/.env* -!.env.dist \ No newline at end of file +!.env.dist +database/Dockerfile.flyway +database/flyway_migrate.sh \ No newline at end of file diff --git a/database/migrations/V1756973486__addConversationIdToChatResponses.sql b/database/migrations/V1756973486__addConversationIdToChatResponses.sql new file mode 100644 index 000000000..df938665b --- /dev/null +++ b/database/migrations/V1756973486__addConversationIdToChatResponses.sql @@ -0,0 +1,8 @@ +ALTER TABLE chat_responses +ADD COLUMN conversation_id UUID DEFAULT gen_random_uuid(); + +-- Create index for efficient conversation queries +CREATE INDEX idx_chat_responses_conversation_id ON chat_responses(conversation_id); + +-- Create index for efficient conversation + timestamp queries +CREATE INDEX idx_chat_responses_conversation_created_at ON chat_responses(conversation_id, created_at); \ No newline at end of file diff --git a/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue b/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue index 850c2627b..8ce9fbc2e 100644 --- a/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue +++ b/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue @@ -139,6 +139,7 @@ const input = ref('') const streamingStatus = ref('') const error = ref('') const messages = ref>([]) // tempData as AIMessage +const conversationId = ref(undefined) const isLoading = computed({ get: () => props.isLoading, @@ -176,15 +177,17 @@ const callChatApi = async (userMessage: string) => { copilotDefaults.value.project, copilotDefaults.value.widget, token.value, - copilotDefaults.value.params) + copilotDefaults.value.params, + conversationId.value) // Handle the streaming response - await copilotApiService.handleStreamingResponse(response, messages.value, (status) => { - streamingStatus.value = status; - }, (message, index) => { - if (index === -1) { - messages.value.push(message); - } else { + const returnedConversationId = await copilotApiService.handleStreamingResponse( + response, messages.value, (status) => { + streamingStatus.value = status; + }, (message, index) => { + if (index === -1) { + messages.value.push(message); + } else { messages.value[index] = message; } @@ -200,10 +203,19 @@ const callChatApi = async (userMessage: string) => { selectedResultId.value = message.id; } scrollToEnd(); - }, () => { + }, (receivedConversationId) => { isLoading.value = false; streamingStatus.value = ''; + // Store the conversationId for subsequent calls + if (receivedConversationId) { + conversationId.value = receivedConversationId; + } }); + + // Also capture conversationId from the return value as backup + if (returnedConversationId && !conversationId.value) { + conversationId.value = returnedConversationId; + } } } catch (err) { console.error('Failed to send message:', err) @@ -233,7 +245,13 @@ const selectResult = (id: string) => { selectedResultId.value = id; } -watch(copilotDefaults, (newDefaults) => { +watch(copilotDefaults, (newDefaults, oldDefaults) => { + // Clear conversation when widget changes + if (oldDefaults && newDefaults.widget !== oldDefaults.widget) { + conversationId.value = undefined; + messages.value = []; + } + if (newDefaults.question) { callChatApi(newDefaults.question); } diff --git a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts index 43c096d9a..d3a4a91eb 100644 --- a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts +++ b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts @@ -11,7 +11,7 @@ import type { Project } from '~~/types/project' export const tempData = testData3 as AIMessage[]; class CopilotApiService { // Generate unique ID for messages - generateId = () => Date.now().toString(36) + Math.random().toString(36).substr(2); + generateId = () => Date.now().toString(36) + Math.random().toString(36).substring(2); generateTextMessage = (message: string, role: MessageRole, status: MessageStatus, type: MessagePartType = 'text') => { const userMessageId = this.generateId(); @@ -31,7 +31,8 @@ class CopilotApiService { project: Project, pipe: string, token: string, - parameters?: CopilotParams): Promise { + parameters?: CopilotParams, + conversationId?: string): Promise { // Prepare the request body with the correct format const requestBody = { messages: messages.map(m => ({ @@ -41,7 +42,8 @@ class CopilotApiService { pipe, segmentId: project?.id, projectName: project?.name, - parameters + parameters, + conversationId } // Send streaming request const response = await fetch('/api/chat/stream', { @@ -121,8 +123,8 @@ class CopilotApiService { messages: Array, statusCallBack: (status: string) => void, messageCallBack: (message: AIMessage, index: number) => void, - completionCallBack: () => void - ) { + completionCallBack: (conversationId?: string) => void + ): Promise { const reader = response.body?.getReader() const decoder = new TextDecoder() @@ -132,6 +134,7 @@ class CopilotApiService { let assistantContent = '' let assistantMessageId: string | null = null + let conversationId: string | undefined = undefined let lineBuffer = '' // Buffer to accumulate partial lines try { @@ -176,13 +179,18 @@ class CopilotApiService { if (result) { assistantContent = result.assistantContent assistantMessageId = result.assistantMessageId + if (result.conversationId) { + conversationId = result.conversationId + } } } } } finally { reader.releaseLock() - completionCallBack(); + completionCallBack(conversationId); } + + return conversationId } private processCompleteLine( @@ -192,7 +200,7 @@ class CopilotApiService { messages: Array, statusCallBack: (status: string) => void, messageCallBack: (message: AIMessage, index: number) => void - ): { assistantMessageId: string | null; assistantContent: string } | null { + ): { assistantMessageId: string | null; assistantContent: string; conversationId?: string } | null { try { // Parse AI SDK data stream format: "prefix:data" const colonIndex = line.indexOf(':') @@ -206,6 +214,8 @@ class CopilotApiService { // Handle different stream prefixes if (prefix === '2') { assistantMessageId = null; + let capturedConversationId: string | undefined = undefined; + // Custom data events from your backend (like router-status) const dataArray = JSON.parse(dataString) for (const data of dataArray) { @@ -236,6 +246,11 @@ class CopilotApiService { statusCallBack('Tool execution completed'); } + // Capture conversationId from chat-response-id for return + if (data.type === 'chat-response-id' && data.conversationId) { + capturedConversationId = data.conversationId; + } + const content = data.type === 'chat-response-id' ? data.id : data.explanation // Create assistant message if it doesn't exist yet @@ -253,10 +268,13 @@ class CopilotApiService { content, explanation: data.explanation, instructions: data.instructions, + conversationId: data.conversationId, timestamp: Date.now() }, -1); } } + + return { assistantMessageId, assistantContent, conversationId: capturedConversationId } } else if (prefix === '0') { // Text delta from streamText (streaming text content) const textDelta = JSON.parse(dataString) diff --git a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts index 7cc742a36..3171c5995 100644 --- a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts +++ b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts @@ -15,7 +15,7 @@ export interface CopilotData { export type CopilotParams = Record; -export type MessagePartType = 'router-status' | 'sql-result' | 'text' | 'pipe-result' | 'chat-response-id'; +export type MessagePartType = 'router-status' | 'sql-result' | 'text' | 'pipe-result' | 'chat-response-id' | 'conversation-id'; export type MessageStatus = 'analyzing' | 'complete' | 'error'; export type MessageData = Record; export type MessageRole = 'user' | 'assistant'; @@ -38,6 +38,7 @@ export interface AIMessage { status: MessageStatus; content: string; timestamp: number; + conversationId?: string; sql?: string; data?: Array; explanation?: string; diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts index 08bc259c3..e5830efe4 100644 --- a/frontend/lib/chat/agents/base-agent.ts +++ b/frontend/lib/chat/agents/base-agent.ts @@ -104,7 +104,6 @@ export abstract class BaseAgent { const fullSystemPrompt = conversationHistoryReceipt + systemPrompt + jsonInstructions // Check if we have messages in the input - const hasMessages = typeof input === 'object' && input !== null && diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 94355bee9..9ee2b3f93 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -9,11 +9,8 @@ import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/ import type { ChatResponse } from '../../server/repo/chat.repo' import { runRouterAgent } from './agents/router' -// TODO: Uncomment once we support text-to-sql -// import { runTextToSqlAgent } from './agents/text-to-sql'; +import { runTextToSqlAgent } from './agents/text-to-sql'; import { runPipeAgent } from './agents/pipe' -// TODO: Uncomment once we support text-to-sql -// eslint-disable-next-line @typescript-eslint/no-unused-vars import { executePipeInstructions, executeTextToSqlInstructions } from './instructions' import type { ChatMessage } from './types' @@ -29,6 +26,7 @@ export async function streamingAgentRequestHandler({ projectName, pipe, parameters, + conversationId, onResponseComplete, }: { messages: ChatMessage[] @@ -36,6 +34,7 @@ export async function streamingAgentRequestHandler({ projectName?: string pipe: string parameters?: Record + conversationId?: string onResponseComplete?: (response: ChatResponse) => Promise }): Promise { const url = new URL( @@ -132,12 +131,14 @@ export async function streamingAgentRequestHandler({ pipeInstructions: undefined, sqlQuery: undefined, model: MODEL, + conversationId: conversationId, }) - // Stream the chat response ID + // Stream the chat response ID and conversation ID dataStream.writeData({ type: 'chat-response-id', id: chatResponseId, + conversationId: conversationId || '', }) } return @@ -168,12 +169,14 @@ export async function streamingAgentRequestHandler({ pipeInstructions: undefined, sqlQuery: undefined, model: MODEL, + conversationId: conversationId, }) - // Stream the chat response ID + // Stream the chat response ID and conversation ID dataStream.writeData({ type: 'chat-response-id', id: chatResponseId, + conversationId: conversationId || '', }) } return @@ -187,44 +190,38 @@ export async function streamingAgentRequestHandler({ }) const followUpTools: Record = {} - // TODO: Uncomment once we support text-to-sql - // if (routerOutput.next_action === "create_query") { - followUpTools['list_datasources'] = tbTools['list_datasources'] - // TODO: Uncomment once we support text-to-sql - // } - // else { - for (const toolName of routerOutput.tools) { + if (routerOutput.next_action === "create_query") { + followUpTools['text_to_sql'] = tbTools['text_to_sql'] + } else { + for (const toolName of routerOutput.tools) { if (tbTools[toolName]) { followUpTools[toolName] = tbTools[toolName] } } - // TODO: Uncomment once we support text-to-sql - // } - // if (routerOutput.next_action === "create_query") { - // const textToSqlOutput = await runTextToSqlAgent({ - // model, - // messages, - // tools: followUpTools, - // date: dateString as string, - // projectName: projectName as string, - // pipe, - // parametersString, - // segmentId: segmentId as string, - // reformulatedQuestion: routerOutput.reformulated_question, - // }); - - // // Execute the SQL query according to the instructions - // const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions); + if (routerOutput.next_action === "create_query") { + const textToSqlOutput = await runTextToSqlAgent({ + model, + messages, + tools: followUpTools, + date: dateString as string, + projectName: projectName as string, + pipe, + parametersString, + segmentId: segmentId as string, + reformulatedQuestion: routerOutput.reformulated_question, + }) + // Execute the SQL query according to the instructions + const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions) + + dataStream.writeData({ + type: "sql-result", + explanation: textToSqlOutput.explanation, + instructions: textToSqlOutput.instructions, + data: queryData + }) - // dataStream.writeData({ - // type: "sql-result", - // explanation: textToSqlOutput.explanation, - // instructions: textToSqlOutput.instructions, - // data: queryData - // }); - // } else - if (routerOutput.next_action === 'pipes') { + } else if (routerOutput.next_action === 'pipes') { const pipeOutput = await runPipeAgent({ model, messages, @@ -274,15 +271,18 @@ export async function streamingAgentRequestHandler({ pipeInstructions: pipeOutput.instructions, sqlQuery: undefined, model: MODEL, + conversationId: conversationId, }) - // Stream the chat response ID + // Stream the chat response ID and conversation ID dataStream.writeData({ type: 'chat-response-id', id: chatResponseId, + conversationId: conversationId || '', }) } } + } } catch (error) { dataStream.writeData({ type: 'router-status', diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts index 111c58549..c49659b66 100644 --- a/frontend/server/api/chat/stream.ts +++ b/frontend/server/api/chat/stream.ts @@ -13,17 +13,21 @@ interface IStreamRequestBody { projectName?: string pipe: string parameters?: Record + conversationId?: string } export default defineEventHandler(async (event): Promise => { try { - const { messages, segmentId, projectName, pipe, parameters } = + const { messages, segmentId, projectName, pipe, parameters, conversationId } = await readBody(event) if (!pipe) { return createError({ statusCode: 400, statusMessage: 'Pipe is required' }) } + // Generate conversationId if not provided + const finalConversationId = conversationId || crypto.randomUUID() + const dbPool = event.context.dbPool as Pool return await streamingAgentRequestHandler({ @@ -32,6 +36,7 @@ export default defineEventHandler(async (event): Promise => { projectName, pipe, parameters, + conversationId: finalConversationId, onResponseComplete: dbPool ? async (response) => { const chatRepo = new ChatRepository(dbPool) diff --git a/frontend/server/repo/chat.repo.ts b/frontend/server/repo/chat.repo.ts index 3f1166282..0e60880fb 100644 --- a/frontend/server/repo/chat.repo.ts +++ b/frontend/server/repo/chat.repo.ts @@ -43,6 +43,7 @@ export interface PipeInstructions { export interface ChatResponse { id?: string + conversationId?: string userPrompt: string routerResponse: 'pipes' | 'text-to-sql' | 'stop' routerReason: string @@ -66,12 +67,14 @@ export class ChatRepository { router_response, router_reason, pipe_instructions, - sql_query, model, + sql_query, + model, input_tokens, output_tokens, - feedback + feedback, + conversation_id ) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) RETURNING id ` @@ -86,6 +89,7 @@ export class ChatRepository { response.inputTokens, response.outputTokens, null, + response.conversationId, ]) return result.rows[0].id From 6fdcb83b35982f44aed43e6d4e76c905ced54b87 Mon Sep 17 00:00:00 2001 From: anilb Date: Mon, 8 Sep 2025 12:40:36 +0200 Subject: [PATCH 02/21] feat: using conversations for history and context Signed-off-by: anilb --- .../copilot/components/copilot-modal.vue | 4 +-- .../copilot/components/copilot-sidebar.vue | 12 ++----- .../components/results/chart-results.vue | 4 +-- .../components/results/results-section.vue | 6 ++-- .../copilot/store/copilot.api.service.ts | 4 +-- .../modules/copilot/types/copilot.types.ts | 8 ++++- frontend/lib/chat/chart/generator.ts | 6 +++- frontend/lib/chat/data-copilot.ts | 15 +++++---- frontend/server/api/chat/chart.ts | 31 +++++++++++++++++-- frontend/server/repo/chat.repo.ts | 12 +++++++ 10 files changed, 72 insertions(+), 30 deletions(-) diff --git a/frontend/app/components/shared/modules/copilot/components/copilot-modal.vue b/frontend/app/components/shared/modules/copilot/components/copilot-modal.vue index 4b8b7f9a8..9df91ef19 100644 --- a/frontend/app/components/shared/modules/copilot/components/copilot-modal.vue +++ b/frontend/app/components/shared/modules/copilot/components/copilot-modal.vue @@ -80,11 +80,11 @@ const isModalOpen = computed({ } }) -const handleDataUpdate = (id: string, data: MessageData[], routerReasoning?: string) => { +const handleDataUpdate = (id: string, data: MessageData[], conversationId?: string) => { resultData.value.push({ id, data, - routerReasoning + conversationId }); diff --git a/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue b/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue index 8ce9fbc2e..70f407cb2 100644 --- a/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue +++ b/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue @@ -124,7 +124,7 @@ const emit = defineEmits<{ (e: 'update:selectedResult', value: string): void; (e: 'update:isLoading', value: boolean): void; (e: 'update:error', value: string): void; - (e: 'update:data', id: string, value: MessageData[], routerReasoning?: string): void; + (e: 'update:data', id: string, value: MessageData[], conversationId?: string): void; }>(); const { copilotDefaults, selectedResultId } = storeToRefs(useCopilotStore()); @@ -192,14 +192,8 @@ const callChatApi = async (userMessage: string) => { } if (message.data) { - // Find router reasoning from the latest router-status message in the conversation - const routerReasoning = messages.value - .slice() - .reverse() - .find(msg => msg.type === 'router-status' && msg.routerReasoning) - ?.routerReasoning; - - emit('update:data', message.id, message.data, routerReasoning); + // Pass the current conversation ID instead of extracting routerReasoning + emit('update:data', message.id, message.data, conversationId.value); selectedResultId.value = message.id; } scrollToEnd(); diff --git a/frontend/app/components/shared/modules/copilot/components/results/chart-results.vue b/frontend/app/components/shared/modules/copilot/components/results/chart-results.vue index 1eeae31c5..9bdf170ef 100644 --- a/frontend/app/components/shared/modules/copilot/components/results/chart-results.vue +++ b/frontend/app/components/shared/modules/copilot/components/results/chart-results.vue @@ -78,7 +78,7 @@ const props = defineProps<{ config: Config | null, isSnapshotModalOpen: boolean, chartErrorType?: ChartErrorType, - routerReasoning?: string + conversationId?: string }>() const isSnapshotModalOpen = computed({ @@ -122,7 +122,7 @@ const generateChart = async () => { isLoading.value = true; - const response = await copilotApiService.callChartApi(props.data, token.value, props.routerReasoning); + const response = await copilotApiService.callChartApi(props.data, token.value, props.conversationId); const data = await response.json(); if (data.config && data.success && data.dataMapping) { diff --git a/frontend/app/components/shared/modules/copilot/components/results/results-section.vue b/frontend/app/components/shared/modules/copilot/components/results/results-section.vue index 2e1161979..7c3bbfc2f 100644 --- a/frontend/app/components/shared/modules/copilot/components/results/results-section.vue +++ b/frontend/app/components/shared/modules/copilot/components/results/results-section.vue @@ -47,7 +47,7 @@ SPDX-License-Identifier: MIT :config="selectedResultConfig" :is-snapshot-modal-open="isSnapshotModalOpen" :chart-error-type="selectedResultChartErrorType" - :router-reasoning="selectedResultRouterReasoning" + :conversation-id="selectedResultConversationId" @update:config="handleConfigUpdate" @update:is-loading="handleChartLoading" @update:is-error="handleChartError" @@ -118,8 +118,8 @@ const selectedResultChartErrorType = computed(() => { return resultData.value.find(result => result.id === selectedResultId.value)?.chartErrorType; }) -const selectedResultRouterReasoning = computed(() => { - return resultData.value.find(result => result.id === selectedResultId.value)?.routerReasoning; +const selectedResultConversationId = computed(() => { + return resultData.value.find(result => result.id === selectedResultId.value)?.conversationId; }) const isEmpty = computed(() => { diff --git a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts index d3a4a91eb..462e97cc4 100644 --- a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts +++ b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts @@ -65,13 +65,13 @@ class CopilotApiService { async callChartApi( sampleData: MessageData[], token: string, - routerReasoning?: string, + conversationId?: string, ): Promise { // Prepare the request body with the correct format const requestBody = { results: sampleData, userQuery: 'Generate a chart for this data', - routerReasoning, + conversationId, } // Send streaming request diff --git a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts index 3171c5995..bb5110306 100644 --- a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts +++ b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts @@ -15,7 +15,12 @@ export interface CopilotData { export type CopilotParams = Record; -export type MessagePartType = 'router-status' | 'sql-result' | 'text' | 'pipe-result' | 'chat-response-id' | 'conversation-id'; +export type MessagePartType = 'router-status' | + 'sql-result' | + 'text' | + 'pipe-result' | + 'chat-response-id' | + 'conversation-id'; export type MessageStatus = 'analyzing' | 'complete' | 'error'; export type MessageData = Record; export type MessageRole = 'user' | 'assistant'; @@ -29,6 +34,7 @@ export interface ResultsHistory { chartConfig?: Config | null; chartErrorType?: ChartErrorType; routerReasoning?: string; + conversationId?: string; } export interface AIMessage { diff --git a/frontend/lib/chat/chart/generator.ts b/frontend/lib/chat/chart/generator.ts index f0093ee72..8248b56c0 100644 --- a/frontend/lib/chat/chart/generator.ts +++ b/frontend/lib/chat/chart/generator.ts @@ -68,7 +68,11 @@ export async function generateChartConfig( output: 'object' as const, schema: outputSchema, system: - 'You are a data visualization expert. Create simple, effective chart configurations using the apache echarts configuration schema.', + `You are a data visualization expert. Create simple, effective chart configurations using the apache echarts configuration schema. + Make sure the generated chart configuration answers the user's question and fits the data shape. + ### USER QUESTION + ${routerReasoning} + ### END USER QUESTION`, prompt: createChartGenerationPrompt(dataProfile, normalizedResults, userQuery), temperature: 0.1, }) diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 9ee2b3f93..491710a6f 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -143,8 +143,8 @@ export async function streamingAgentRequestHandler({ } return } - // TODO: Remove this once we support text-to-sql + /* else if (routerOutput.next_action === 'create_query') { const fallbackMessage = `I'm unable to answer this question with the widgets I have access. But soon I will be able to construct my own queries for these questions if I have access to the necessary data.` @@ -181,6 +181,7 @@ export async function streamingAgentRequestHandler({ } return } + */ dataStream.writeData({ type: 'router-status', @@ -190,16 +191,10 @@ export async function streamingAgentRequestHandler({ }) const followUpTools: Record = {} + if (routerOutput.next_action === "create_query") { followUpTools['text_to_sql'] = tbTools['text_to_sql'] - } else { - for (const toolName of routerOutput.tools) { - if (tbTools[toolName]) { - followUpTools[toolName] = tbTools[toolName] - } - } - if (routerOutput.next_action === "create_query") { const textToSqlOutput = await runTextToSqlAgent({ model, messages, @@ -222,6 +217,10 @@ export async function streamingAgentRequestHandler({ }) } else if (routerOutput.next_action === 'pipes') { + for (const toolName of routerOutput.tools) { + if (tbTools[toolName]) { + followUpTools[toolName] = tbTools[toolName] + } const pipeOutput = await runPipeAgent({ model, messages, diff --git a/frontend/server/api/chat/chart.ts b/frontend/server/api/chat/chart.ts index 487f472ba..9044aa7fe 100644 --- a/frontend/server/api/chat/chart.ts +++ b/frontend/server/api/chat/chart.ts @@ -1,18 +1,42 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT +import pg from 'pg' import { generateChartConfig, modifyChartConfig } from '../../../lib/chat/chart/generator' +import { ChatRepository } from '../../repo/chat.repo' import { Result, Config, DataMapping } from '../../../lib/chat/chart/types' import { PipeInstructions } from '~~/lib/chat/types' + export const maxDuration = 30 +// Helper function to get router reasoning from conversation +async function getRouterReasoningFromConversation(conversationId?: string): Promise { + if (!conversationId) return undefined + + try { + const pool = new pg.Pool({ + connectionString: process.env.DATABASE_URL, + }) + + const chatRepo = new ChatRepository(pool) + const latestResponse = await chatRepo.getLatestChatResponseByConversation(conversationId) + + await pool.end() + + return latestResponse?.routerReason || undefined + } catch (error) { + console.error('Error fetching router reasoning from conversation:', error) + return undefined + } +} + interface IChartRequestBody { results?: Result[] userQuery?: string currentConfig?: Config instructions?: string pipeInstructions?: PipeInstructions - routerReasoning?: string + conversationId?: string } interface ChartConfigResponse { @@ -25,9 +49,12 @@ interface ChartConfigResponse { export default defineEventHandler(async (event): Promise => { try { - const { results, userQuery, currentConfig, instructions, pipeInstructions, routerReasoning } = + const { results, userQuery, currentConfig, instructions, pipeInstructions, conversationId } = await readBody(event) + // Get router reasoning from conversation + const routerReasoning = await getRouterReasoningFromConversation(conversationId) + // If pipe instructions are provided, execute them first to get results if (pipeInstructions && !results) { const { executePipeInstructions } = await import('../../../lib/chat/instructions') diff --git a/frontend/server/repo/chat.repo.ts b/frontend/server/repo/chat.repo.ts index 0e60880fb..cd2866052 100644 --- a/frontend/server/repo/chat.repo.ts +++ b/frontend/server/repo/chat.repo.ts @@ -118,4 +118,16 @@ export class ChatRepository { const result = await this.pool.query(query, [chatResponseId]) return result.rows.length > 0 ? result.rows[0] : null } + + async getLatestChatResponseByConversation(conversationId: string): Promise { + const query = ` + SELECT * FROM chat_responses + WHERE conversation_id = $1 + ORDER BY created_at DESC + LIMIT 1 + ` + + const result = await this.pool.query(query, [conversationId]) + return result.rows.length > 0 ? result.rows[0] : null + } } From 640af08b39930b4a4add7e494b8b1c2c40bf1aae Mon Sep 17 00:00:00 2001 From: anilb Date: Thu, 11 Sep 2025 10:17:29 +0200 Subject: [PATCH 03/21] chore: refactored data-copilot for readability, conversation history for all agents Signed-off-by: anilb --- ..._alterChatResponsesRouterResponseCheck.sql | 6 + .../copilot/store/copilot.api.service.ts | 2 +- frontend/lib/chat/agents/base-agent.ts | 36 +- frontend/lib/chat/agents/index.ts | 7 + frontend/lib/chat/agents/pipe.ts | 10 - frontend/lib/chat/agents/router.ts | 32 - frontend/lib/chat/agents/text-to-sql.ts | 9 - frontend/lib/chat/chart/analysis.ts | 29 +- frontend/lib/chat/chart/generator.ts | 65 +- frontend/lib/chat/data-copilot.ts | 735 ++++++++++++------ frontend/lib/chat/enums.ts | 19 + frontend/lib/chat/types.ts | 44 +- frontend/nuxt.config.ts | 6 + frontend/server/api/chat/feedback/[id].ts | 6 +- frontend/server/api/chat/stream.ts | 39 +- frontend/server/middleware/database.ts | 5 +- frontend/server/repo/chat.repo.ts | 3 +- frontend/server/repo/insightsProjects.repo.ts | 67 ++ frontend/server/utils/db.ts | 30 +- 19 files changed, 780 insertions(+), 370 deletions(-) create mode 100644 database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql create mode 100644 frontend/lib/chat/agents/index.ts create mode 100644 frontend/lib/chat/enums.ts create mode 100644 frontend/server/repo/insightsProjects.repo.ts diff --git a/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql b/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql new file mode 100644 index 000000000..7c6044389 --- /dev/null +++ b/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql @@ -0,0 +1,6 @@ +-- Drop the existing check constraint +ALTER TABLE chat_responses DROP CONSTRAINT chat_responses_router_response_check; + +-- Add the new check constraint with 'create_query' instead of 'text-to-sql' +ALTER TABLE chat_responses ADD CONSTRAINT chat_responses_router_response_check + CHECK (router_response IN ('pipes', 'create_query', 'stop')); \ No newline at end of file diff --git a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts index 462e97cc4..f571d3773 100644 --- a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts +++ b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts @@ -40,7 +40,7 @@ class CopilotApiService { content: m.content })), pipe, - segmentId: project?.id, + projectSlug: project?.slug, projectName: project?.name, parameters, conversationId diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts index e5830efe4..0e1be535c 100644 --- a/frontend/lib/chat/agents/base-agent.ts +++ b/frontend/lib/chat/agents/base-agent.ts @@ -14,10 +14,8 @@ export abstract class BaseAgent { abstract readonly temperature: number abstract readonly maxSteps: number - protected getConversationHistory( - input: { messages: ChatMessage[] } & T, - ): string { - const userMessages = input.messages.filter((m) => m.role === 'user') + protected getConversationHistory(messages: ChatMessage[]): string { + const userMessages = messages.filter((m) => m.role === 'user') if (userMessages.length > 1) { return JSON.stringify(userMessages.slice(0, -1), null, 2) } @@ -91,7 +89,7 @@ export abstract class BaseAgent { return 'value' } - async execute(input: TInput): Promise { + async execute(input: TInput & { messages: ChatMessage[] }): Promise { try { const systemPrompt = await this.getSystemPrompt(input) const userPrompt = this.getUserPrompt(input) @@ -99,7 +97,7 @@ export abstract class BaseAgent { // Append JSON format instructions to system prompt const jsonInstructions = this.generateJSONInstructions() - const conversationHistoryReceipt = this.generateConversationHistoryReceipt(input) + const conversationHistoryReceipt = this.generateConversationHistoryReceipt(input.messages) const fullSystemPrompt = conversationHistoryReceipt + systemPrompt + jsonInstructions @@ -150,6 +148,7 @@ export abstract class BaseAgent { usage: response.usage, } } catch (error) { + // TODO:: Potentially a good place to catch zod validation errors and retryinh throw this.createError(error) } } @@ -189,10 +188,33 @@ export abstract class BaseAgent { } } + protected generateConversationHistoryReceipt(messages: ChatMessage[]): string { + try { + const conversationHistory = this.getConversationHistory(messages) + + if (!conversationHistory || conversationHistory.trim() === '') { + return '' + } + + return ` + + ## CONVERSATION HISTORY (FOR CONTEXT ONLY) + + The following is the conversation history leading up to the current question. \n\n + Use this ONLY for context and understanding. Do NOT attempt to answer previous questions. + + ${conversationHistory} + + ## END OF CONVERSATION HISTORY` + } catch (error) { + console.error('Error generating conversation history context', error) + return '' + } + } + protected abstract getModel(input: TInput): any protected abstract getSystemPrompt(input: TInput): string | Promise protected abstract getUserPrompt(input: TInput): string - protected abstract generateConversationHistoryReceipt(input: TInput): string protected abstract getTools(input: TInput): Record protected abstract createError(error: unknown): Error diff --git a/frontend/lib/chat/agents/index.ts b/frontend/lib/chat/agents/index.ts new file mode 100644 index 000000000..4091c8ac4 --- /dev/null +++ b/frontend/lib/chat/agents/index.ts @@ -0,0 +1,7 @@ +// Copyright (c) 2025 The Linux Foundation and each contributor. +// SPDX-License-Identifier: MIT +import { PipeAgent } from './pipe' +import { RouterAgent } from './router' +import { TextToSqlAgent } from './text-to-sql' + +export { PipeAgent, RouterAgent, TextToSqlAgent } diff --git a/frontend/lib/chat/agents/pipe.ts b/frontend/lib/chat/agents/pipe.ts index a2dcc8633..a0bb805fc 100644 --- a/frontend/lib/chat/agents/pipe.ts +++ b/frontend/lib/chat/agents/pipe.ts @@ -40,10 +40,6 @@ export class PipeAgent extends BaseAgent { return input.tools } - protected generateConversationHistoryReceipt(_input: PipeAgentInput): string { - return '' - } - protected createError(error: unknown): Error { if (error instanceof Error) { return new Error(`Pipe agent error: ${error.message}`) @@ -51,9 +47,3 @@ export class PipeAgent extends BaseAgent { return new Error(`Pipe agent error: ${String(error)}`) } } - -// Convenience function to maintain backward compatibility -export async function runPipeAgent(params: PipeAgentInput): Promise { - const agent = new PipeAgent() - return agent.execute(params) -} diff --git a/frontend/lib/chat/agents/router.ts b/frontend/lib/chat/agents/router.ts index d04d0afdb..d7a7a01fc 100644 --- a/frontend/lib/chat/agents/router.ts +++ b/frontend/lib/chat/agents/router.ts @@ -1,8 +1,6 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT /* eslint-disable @typescript-eslint/no-explicit-any */ -// Copyright (c) 2025 The Linux Foundation and each contributor. -// SPDX-License-Identifier: MIT import type { RouterOutput, RouterAgentInput } from '../types' import { routerOutputSchema } from '../types' import { routerPrompt } from '../prompts/router' @@ -34,30 +32,6 @@ export class RouterAgent extends BaseAgent { return '' } - protected generateConversationHistoryReceipt(input: RouterAgentInput): string { - try { - const conversationHistory = this.getConversationHistory(input) - - if (!conversationHistory || conversationHistory.trim() === '') { - return '' - } - - return ` - - ## CONVERSATION HISTORY (FOR CONTEXT ONLY) - - The following is the conversation history leading up to the current question. \n\n - Use this ONLY for context and understanding. Do NOT attempt to answer previous questions. - - ${conversationHistory} - - ## END OF CONVERSATION HISTORY` - } catch (error) { - console.error('Error generating conversation history context', error) - return '' - } - } - protected getTools(input: RouterAgentInput): Record { // Only allow calling list_datasources; all other tools remain visible in prompt via toolsOverview const allowed: Record = {} @@ -74,9 +48,3 @@ export class RouterAgent extends BaseAgent { return new Error(`Router agent error: ${String(error)}`) } } - -// Convenience function to maintain backward compatibility -export async function runRouterAgent(params: RouterAgentInput): Promise { - const agent = new RouterAgent() - return agent.execute(params) -} diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts index e3c02f463..9cce9b4a5 100644 --- a/frontend/lib/chat/agents/text-to-sql.ts +++ b/frontend/lib/chat/agents/text-to-sql.ts @@ -75,16 +75,7 @@ export class TextToSqlAgent extends BaseAgent { } } - protected generateConversationHistoryReceipt(_input: TextToSqlAgentInput): string { - return '' - } - protected override shouldMonitorToolCalls(_input: TextToSqlAgentInput): boolean { return true // Enable tool call monitoring for SQL agent } } - -export async function runTextToSqlAgent(params: TextToSqlAgentInput): Promise { - const agent = new TextToSqlAgent() - return agent.execute(params) -} diff --git a/frontend/lib/chat/chart/analysis.ts b/frontend/lib/chat/chart/analysis.ts index 37050c299..6df2ebe5b 100644 --- a/frontend/lib/chat/chart/analysis.ts +++ b/frontend/lib/chat/chart/analysis.ts @@ -188,12 +188,25 @@ export function pivotLongToWide( function detectLeaderboardFromReasoning(routerReasoning?: string): boolean { if (!routerReasoning) return false - + const leaderboardKeywords = [ - 'leaderboard', 'ranking', 'rank', 'top', 'leading', 'highest', 'lowest', - 'leaders', 'ranked', 'ranking', 'position', 'standings', 'table', 'best', 'worst' + 'leaderboard', + 'ranking', + 'rank', + 'top', + 'leading', + 'highest', + 'lowest', + 'leaders', + 'ranked', + 'ranking', + 'position', + 'standings', + 'table', + 'best', + 'worst', ] - + const reasoningText = routerReasoning.toLowerCase() return leaderboardKeywords.some((keyword) => reasoningText.includes(keyword)) } @@ -220,7 +233,11 @@ function prioritizeMetricForLeaderboard(numericColumns: ColumnProfile[]): string return numericColumns[0]?.name || null } -function detectComparisonScenario(columns: ColumnProfile[], userQuestion: string, routerReasoning?: string) { +function detectComparisonScenario( + columns: ColumnProfile[], + userQuestion: string, + routerReasoning?: string, +) { const numericColumns = columns.filter((c) => c.type === 'numeric') const columnNames = columns.map((c) => c.name.toLowerCase()) @@ -240,7 +257,7 @@ function detectComparisonScenario(columns: ColumnProfile[], userQuestion: string type: 'leaderboard' as const, primaryColumns: [primaryMetric], secondaryColumns: secondaryMetrics, - } + }, } } } diff --git a/frontend/lib/chat/chart/generator.ts b/frontend/lib/chat/chart/generator.ts index 8248b56c0..1117af639 100644 --- a/frontend/lib/chat/chart/generator.ts +++ b/frontend/lib/chat/chart/generator.ts @@ -20,7 +20,11 @@ const bedrock = createAmazonBedrock({ region: process.env.NUXT_AWS_BEDROCK_REGION, }) -export type ChartConfig = { config: Config | null; dataMapping: DataMapping[] | null; isMetric?: boolean } +export type ChartConfig = { + config: Config | null + dataMapping: DataMapping[] | null + isMetric?: boolean +} // Color arrays for different chart types and data point counts const chartColors = { @@ -67,8 +71,7 @@ export async function generateChartConfig( model, output: 'object' as const, schema: outputSchema, - system: - `You are a data visualization expert. Create simple, effective chart configurations using the apache echarts configuration schema. + system: `You are a data visualization expert. Create simple, effective chart configurations using the apache echarts configuration schema. Make sure the generated chart configuration answers the user's question and fits the data shape. ### USER QUESTION ${routerReasoning} @@ -97,9 +100,9 @@ export async function generateChartConfig( chartConfig.yAxis.inverse = true if (chartConfig.grid) { - chartConfig.grid.left = "0.2%" + chartConfig.grid.left = '0.2%' } - chartConfig.series.map((s) => s.seriesLayoutBy = 'column') + chartConfig.series.map((s) => (s.seriesLayoutBy = 'column')) } // Apply default colors if not already set @@ -292,7 +295,7 @@ function generateFallbackConfig(profile: any): Config { }, ...(type !== 'pie' && { // For leaderboard, swap axes to create horizontal bars - xAxis: isLeaderboard + xAxis: isLeaderboard ? { type: 'value', name: yKeys.length === 1 ? yKeys[0] : 'Value', @@ -340,22 +343,33 @@ function generateFallbackConfig(profile: any): Config { axisTick: { show: false }, } : useDualAxis - ? [ - { - type: 'value', - name: primaryKeys.join(' / '), - position: 'left', - axisLabel: { - fontSize: 12, - fontWeight: 'normal', - color: lfxColors.neutral[400], - fontFamily: 'Inter', + ? [ + { + type: 'value', + name: primaryKeys.join(' / '), + position: 'left', + axisLabel: { + fontSize: 12, + fontWeight: 'normal', + color: lfxColors.neutral[400], + fontFamily: 'Inter', + }, }, - }, - { + { + type: 'value', + name: secondaryKeys.join(' / '), + position: 'right', + axisLabel: { + fontSize: 12, + fontWeight: 'normal', + color: lfxColors.neutral[400], + fontFamily: 'Inter', + }, + }, + ] + : { type: 'value', - name: secondaryKeys.join(' / '), - position: 'right', + name: yKeys.length === 1 ? yKeys[0] : 'Value', axisLabel: { fontSize: 12, fontWeight: 'normal', @@ -363,17 +377,6 @@ function generateFallbackConfig(profile: any): Config { fontFamily: 'Inter', }, }, - ] - : { - type: 'value', - name: yKeys.length === 1 ? yKeys[0] : 'Value', - axisLabel: { - fontSize: 12, - fontWeight: 'normal', - color: lfxColors.neutral[400], - fontFamily: 'Inter', - }, - }, grid: { left: '8%', right: useDualAxis ? '15%' : '8%', diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 491710a6f..03033230b 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -1,18 +1,29 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT /* eslint-disable @typescript-eslint/no-explicit-any */ -// Copyright (c) 2025 The Linux Foundation and each contributor. -// SPDX-License-Identifier: MIT import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock' -import { experimental_createMCPClient as createMCPClient, createDataStreamResponse } from 'ai' +import { + experimental_createMCPClient as createMCPClient, + createDataStreamResponse, + type LanguageModelV1, +} from 'ai' import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js' +import type { Pool } from 'pg' import type { ChatResponse } from '../../server/repo/chat.repo' +import { ChatRepository } from '../../server/repo/chat.repo' -import { runRouterAgent } from './agents/router' -import { runTextToSqlAgent } from './agents/text-to-sql'; -import { runPipeAgent } from './agents/pipe' +import { TextToSqlAgent, PipeAgent, RouterAgent } from './agents' import { executePipeInstructions, executeTextToSqlInstructions } from './instructions' -import type { ChatMessage } from './types' +import type { + DataCopilotQueryInput, + PipeAgentInput, + PipeAgentStreamInput, + PipeInstructions, + RouterAgentInput, + TextToSqlAgentInput, + TextToSqlAgentStreamInput, +} from './types' +import { RouterDecisionAction, StreamDataStatus, StreamDataType } from './enums' const bedrock = createAmazonBedrock({ accessKeyId: process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID, @@ -20,277 +31,503 @@ const bedrock = createAmazonBedrock({ region: process.env.NUXT_AWS_BEDROCK_REGION, }) -export async function streamingAgentRequestHandler({ - messages, - segmentId, - projectName, - pipe, - parameters, - conversationId, - onResponseComplete, -}: { - messages: ChatMessage[] - segmentId?: string - projectName?: string - pipe: string - parameters?: Record - conversationId?: string - onResponseComplete?: (response: ChatResponse) => Promise -}): Promise { - const url = new URL( - `https://mcp.tinybird.co?token=${process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN}&host=${process.env.NUXT_TINYBIRD_BASE_URL}`, - ) - - const mcpClient = await createMCPClient({ - transport: new StreamableHTTPClientTransport(url, { - sessionId: `session_${Date.now()}`, - }), - }) - - const MODEL = 'us.anthropic.claude-sonnet-4-20250514-v1:0' - - const tbTools = await mcpClient.tools({}) - // Build a human-readable overview of all tools for the router's prompt (read-only catalog) - const excludedFromOverview = new Set([ - 'explore_data', - 'execute_query', - 'text_to_sql', - 'list_endpoints', - 'list_service_datasources', - ]) - const toolsOverview = Object.entries(tbTools) - .filter(([name]) => !excludedFromOverview.has(name)) - .map(([name, def]: [string, any]) => { - try { - const description = def?.description || def?.meta?.description || '' - const inputSchema = def?.inputSchema || def?.parameters || def?.schema || undefined - const params = inputSchema ? JSON.stringify(inputSchema, null, 2) : undefined - return [`- ${name}: ${description}`, params ? ` params: ${params}` : undefined] - .filter(Boolean) - .join('\n') - } catch { - return `- ${name}` - } - }) - .join('\n') - const parametersString = JSON.stringify(parameters || {}) - const dateString = new Date().toISOString().split('T')[0] - const model = bedrock(MODEL) - - return createDataStreamResponse({ - execute: async (dataStream) => { - const responseData = { - question: messages[messages.length - 1]?.content || '', - answer: '', - reasoning: '', - explanation: '', - data: null as any, - inputTokens: 0, - outputTokens: 0, - } +export class DataCopilot { + /** MCP client for communicating with Tinybird services */ + private mcpClient: any - try { - dataStream.writeData({ - type: 'router-status', - status: 'analyzing', - }) - - const routerOutput = await runRouterAgent({ - model, - messages, - tools: tbTools, - toolsOverview, - date: dateString as string, - projectName: projectName as string, - pipe, - parametersString, - segmentId: segmentId as string, - }) - // Accumulate token usage from router - if (routerOutput.usage) { - responseData.inputTokens += routerOutput.usage.promptTokens || 0 - responseData.outputTokens += routerOutput.usage.completionTokens || 0 - } + /** Available Tinybird tools loaded from MCP server */ + private tbTools: Record = {} - if (routerOutput.next_action === 'stop') { - responseData.reasoning = `Router Decision: ${routerOutput.next_action}\nReasoning: ${routerOutput.reasoning}` - responseData.answer = routerOutput.reasoning - dataStream.writeData({ - type: 'router-status', - status: 'complete', - reasoning: routerOutput.reasoning, - }) + /** Human-readable overview of tools for router agent decision making */ + private toolsOverview: string = '' - if (onResponseComplete) { - const chatResponseId = await onResponseComplete({ - userPrompt: responseData.question, - inputTokens: responseData.inputTokens, - outputTokens: responseData.outputTokens, - routerResponse: 'stop', - routerReason: routerOutput.reasoning, - pipeInstructions: undefined, - sqlQuery: undefined, - model: MODEL, - conversationId: conversationId, - }) + /** Tinybird MCP server URL */ + private tbMcpUrl: string = '' - // Stream the chat response ID and conversation ID - dataStream.writeData({ - type: 'chat-response-id', - id: chatResponseId, - conversationId: conversationId || '', - }) - } - return - } - // TODO: Remove this once we support text-to-sql - /* - else if (routerOutput.next_action === 'create_query') { - const fallbackMessage = `I'm unable to answer this question with the widgets I have access. - But soon I will be able to construct my own queries for these questions if I have access to the necessary data.` - - responseData.answer = fallbackMessage - responseData.reasoning = `Router Decision: ${routerOutput.next_action}\n - Reasoning: ${routerOutput.reasoning}\n - Fallback: Text-to-SQL not yet supported` - dataStream.writeData({ - type: 'router-status', - status: 'complete', - reasoning: fallbackMessage, - }) + /** Amazon Bedrock language model instance */ + private model: LanguageModelV1 - if (onResponseComplete) { - const chatResponseId = await onResponseComplete({ - userPrompt: responseData.question, - inputTokens: responseData.inputTokens, - outputTokens: responseData.outputTokens, - routerResponse: 'text-to-sql', - routerReason: routerOutput.reasoning, - pipeInstructions: undefined, - sqlQuery: undefined, - model: MODEL, - conversationId: conversationId, - }) + /** Bedrock model identifier */ + private readonly BEDROCK_MODEL_ID = 'us.anthropic.claude-sonnet-4-20250514-v1:0' - // Stream the chat response ID and conversation ID - dataStream.writeData({ - type: 'chat-response-id', - id: chatResponseId, - conversationId: conversationId || '', - }) - } - return + constructor() { + this.model = bedrock(this.BEDROCK_MODEL_ID) + this.tbMcpUrl = `https://mcp.tinybird.co?token=${process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN}&host=${process.env.NUXT_TINYBIRD_BASE_URL}` + } + + /** + * Initialize MCP client connection and load Tinybird tools + */ + async initialize(): Promise { + const url = new URL(this.tbMcpUrl) + + this.mcpClient = await createMCPClient({ + transport: new StreamableHTTPClientTransport(url, { + sessionId: `session_${Date.now()}`, + }), + }) + + this.tbTools = await this.mcpClient.tools({}) + this.buildToolsOverview() + } + + /** + * Build human-readable overview of available tools for the router agent + */ + private buildToolsOverview(): void { + const excludedFromOverview = new Set([ + 'explore_data', + 'execute_query', + 'text_to_sql', + 'list_endpoints', + 'list_service_datasources', + ]) + + this.toolsOverview = Object.entries(this.tbTools) + .filter(([name]) => !excludedFromOverview.has(name)) + .map(([name, def]: [string, any]) => { + try { + const description = def?.description || def?.meta?.description || '' + const inputSchema = def?.inputSchema || def?.parameters || def?.schema || undefined + const params = inputSchema ? JSON.stringify(inputSchema, null, 2) : undefined + return [`- ${name}: ${description}`, params ? ` params: ${params}` : undefined] + .filter(Boolean) + .join('\n') + } catch { + return `- ${name}` } - */ + }) + .join('\n') + } - dataStream.writeData({ - type: 'router-status', - status: 'complete', - reasoning: routerOutput.reasoning, - reformulatedQuestion: routerOutput.reformulated_question, - }) + /** + * Save chat response to database + */ + private async saveChatResponse( + response: ChatResponse, + insightsDbPool: Pool, + userEmail: string, + ): Promise { + const chatRepo = new ChatRepository(insightsDbPool) + return await chatRepo.saveChatResponse(response, userEmail) + } - const followUpTools: Record = {} + /** + * Executes the router agent to analyze user queries and determine the optimal processing strategy. + * The router acts as the decision-making component that routes requests to either SQL generation + * or data pipeline processing based on query complexity and intent. + * + * @param messages - User conversation history providing context for the query + * @param date - Current date string for time-based query filtering + * @param projectName - Project identifier for data scoping and access control + * @param pipe - Main data endpoint or pipeline identifier + * @param parametersString - Additional query parameters serialized as JSON + * @param segmentId - Data segment filter for multi-tenant data access + * @returns Router decision with next action, reasoning, and selected tools + */ + private async runRouterAgent({ + messages, + date, + projectName, + pipe, + parametersString, + segmentId, + }: Omit) { + const agent = new RouterAgent() + return agent.execute({ + model: this.model, + messages, + tools: this.tbTools, + toolsOverview: this.toolsOverview, + date, + projectName, + pipe, + parametersString, + segmentId, + }) + } - if (routerOutput.next_action === "create_query") { - followUpTools['text_to_sql'] = tbTools['text_to_sql'] + /** + * Executes the text-to-SQL agent to convert natural language questions into executable SQL queries. + * This agent understands database schemas, applies proper filtering, and generates optimized queries + * for direct data access when users need raw data rather than processed analytics. + * + * @param messages - Original conversation context for understanding query intent + * @param date - Current date for constructing time-based WHERE conditions + * @param projectName - Project context for database table scoping + * @param pipe - Data source identifier for table selection + * @param parametersString - Additional query parameters for filtering + * @param segmentId - Segment identifier for multi-tenant data filtering + * @param reformulatedQuestion - Clarified question from router agent for better SQL generation + * @returns SQL query string with explanation and token usage metrics + */ + private async runTextToSqlAgent({ + messages, + date, + projectName, + pipe, + parametersString, + segmentId, + reformulatedQuestion, + }: TextToSqlAgentInput) { + const followUpTools: Record = {} + followUpTools['text_to_sql'] = this.tbTools['text_to_sql'] + followUpTools['list_datasources'] = this.tbTools['list_datasources'] - const textToSqlOutput = await runTextToSqlAgent({ - model, - messages, - tools: followUpTools, - date: dateString as string, - projectName: projectName as string, - pipe, - parametersString, - segmentId: segmentId as string, - reformulatedQuestion: routerOutput.reformulated_question, - }) - // Execute the SQL query according to the instructions - const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions) - + const agent = new TextToSqlAgent() + return agent.execute({ + model: this.model, + messages, + tools: followUpTools, + date, + projectName, + pipe, + parametersString, + segmentId, + reformulatedQuestion, + }) + } + + /** + * Executes the pipe agent to generate tinybird pipeline instructions. + * This agent designs multi-step workflows that use one or more tinybird pipes. + * Each pipe is used to answer specific parts of the user's analytical question. + * + * @param messages - Original conversation context for understanding analytical requirements + * @param date - Current date for time-based data filtering in pipeline steps + * @param projectName - Project identifier for data access and pipeline scoping + * @param pipe - Primary pipeline identifier for data source selection + * @param parametersString - Additional processing parameters for pipeline configuration + * @param segmentId - Segment filter for multi-tenant pipeline execution + * @param reformulatedQuestion - Refined analytical question from router agent + * @param toolNames - Selected Tinybird tools for pipeline construction (e.g., aggregation, transformation tools) + * @returns Pipeline instructions with processing steps, column definitions, and explanation + */ + private async runPipeAgent({ + messages, + date, + projectName, + pipe, + parametersString, + segmentId, + reformulatedQuestion, + toolNames, + }: Omit) { + const followUpTools: Record = {} + for (const toolName of toolNames) { + if (this.tbTools[toolName]) { + followUpTools[toolName] = this.tbTools[toolName] + } + } + const agent = new PipeAgent() + return agent.execute({ + model: this.model, + messages, + tools: followUpTools, + date, + projectName, + pipe, + parametersString, + segmentId, + reformulatedQuestion, + toolNames, + }) + } + + /** + * Main streaming handler that orchestrates the entire AI agent workflow + */ + async streamingAgentRequestHandler({ + messages, + segmentId, + projectName, + pipe, + parameters, + conversationId, + insightsDbPool, + userEmail, + }: DataCopilotQueryInput): Promise { + const parametersString = JSON.stringify(parameters || {}) + const date = new Date().toISOString().slice(0, 10) + + return createDataStreamResponse({ + execute: async (dataStream) => { + const responseData: ChatResponse = { + userPrompt: messages[messages.length - 1]?.content || '', + inputTokens: 0, + outputTokens: 0, + model: this.BEDROCK_MODEL_ID, + conversationId: conversationId || '', + routerResponse: RouterDecisionAction.STOP, + routerReason: '', + pipeInstructions: undefined as PipeInstructions | undefined, + sqlQuery: undefined as string | undefined, + } + + try { dataStream.writeData({ - type: "sql-result", - explanation: textToSqlOutput.explanation, - instructions: textToSqlOutput.instructions, - data: queryData + type: StreamDataType.ROUTER_STATUS, + status: StreamDataStatus.ANALYZING, }) - } else if (routerOutput.next_action === 'pipes') { - for (const toolName of routerOutput.tools) { - if (tbTools[toolName]) { - followUpTools[toolName] = tbTools[toolName] - } - const pipeOutput = await runPipeAgent({ - model, + const routerOutput = await this.runRouterAgent({ messages, - tools: followUpTools, - date: dateString as string, + date, projectName: projectName as string, pipe, parametersString, segmentId: segmentId as string, - reformulatedQuestion: routerOutput.reformulated_question, - toolNames: routerOutput.tools, }) - // Accumulate token usage from pipe agent - if (pipeOutput.usage) { - responseData.inputTokens += pipeOutput.usage.promptTokens || 0 - responseData.outputTokens += pipeOutput.usage.completionTokens || 0 + // Accumulate token usage from router + if (routerOutput.usage) { + responseData.inputTokens += routerOutput.usage.promptTokens || 0 + responseData.outputTokens += routerOutput.usage.completionTokens || 0 } - // Execute the pipes according to the instructions and combine results - const combinedData = await executePipeInstructions(pipeOutput.instructions) - - responseData.explanation = pipeOutput.explanation - responseData.answer = pipeOutput.explanation - responseData.reasoning = `Router Decision: ${routerOutput.next_action}\n - Router Reasoning: ${routerOutput.reasoning}\n - Tools Selected: ${routerOutput.tools ? routerOutput.tools.join(', ') : 'none'}\n - Reformulated Question: ${routerOutput.reformulated_question}\n - Pipe Agent Explanation: ${pipeOutput.explanation}` - - responseData.data = combinedData + if (routerOutput.next_action === RouterDecisionAction.STOP) { + await this.handleStopAction( + messages[messages.length - 1]?.content || '', + routerOutput, + responseData, + dataStream, + insightsDbPool, + userEmail, + conversationId, + ) + return + } dataStream.writeData({ - type: 'pipe-result', - explanation: pipeOutput.explanation, - instructions: pipeOutput.instructions, - data: combinedData, + type: StreamDataType.ROUTER_STATUS, + status: StreamDataStatus.COMPLETE, + reasoning: routerOutput.reasoning, + reformulatedQuestion: routerOutput.reformulated_question, }) - if (onResponseComplete) { - const chatResponseId = await onResponseComplete({ - userPrompt: responseData.question, - inputTokens: responseData.inputTokens, - outputTokens: responseData.outputTokens, - routerResponse: 'pipes', - routerReason: routerOutput.reasoning, - pipeInstructions: pipeOutput.instructions, - sqlQuery: undefined, - model: MODEL, - conversationId: conversationId, - }) + let sqlQuery: string | undefined = undefined + let pipeInstructions: PipeInstructions | undefined = undefined - // Stream the chat response ID and conversation ID - dataStream.writeData({ - type: 'chat-response-id', - id: chatResponseId, - conversationId: conversationId || '', + if (routerOutput.next_action === RouterDecisionAction.CREATE_QUERY) { + const result = await this.handleCreateQueryAction({ + messages, + date, + projectName: projectName as string, + pipe, + parametersString, + segmentId: segmentId as string, + reformulatedQuestion: routerOutput.reformulated_question, + dataStream, + }) + sqlQuery = result.sqlQuery + } else if (routerOutput.next_action === RouterDecisionAction.PIPES) { + const result = await this.handlePipesAction({ + messages, + date, + projectName: projectName as string, + pipe, + parametersString, + segmentId: segmentId as string, + reformulatedQuestion: routerOutput.reformulated_question, + toolNames: routerOutput.tools, + dataStream, + responseData, + routerOutput, }) + pipeInstructions = result.pipeInstructions } + + await this.handleResponseComplete({ + userPrompt: messages[messages.length - 1]?.content || '', + responseData, + routerOutput, + pipeInstructions, + sqlQuery, + conversationId, + insightsDbPool, + userEmail, + dataStream, + }) + } catch (error) { + dataStream.writeData({ + type: 'router-status', + status: 'error', + error: error instanceof Error ? error.message : 'An error occurred', + }) + throw error } - } - } catch (error) { - dataStream.writeData({ - type: 'router-status', - status: 'error', - error: error instanceof Error ? error.message : 'An error occurred', - }) - - throw error - } - }, - }) + }, + }) + } + + /** + * Handle router 'stop' action - send final response without further processing + */ + private async handleStopAction( + userPrompt: string, + routerOutput: any, + responseData: any, + dataStream: any, + insightsDbPool: Pool, + userEmail: string, + conversationId?: string, + ): Promise { + responseData.reasoning = `Router Decision: ${routerOutput.next_action}\nReasoning: ${routerOutput.reasoning}` + responseData.answer = routerOutput.reasoning + + dataStream.writeData({ + type: StreamDataType.ROUTER_STATUS, + status: StreamDataStatus.COMPLETE, + reasoning: routerOutput.reasoning, + }) + + const chatResponseId = await this.saveChatResponse( + { + userPrompt, + inputTokens: responseData.inputTokens, + outputTokens: responseData.outputTokens, + routerResponse: RouterDecisionAction.STOP, + routerReason: routerOutput.reasoning, + pipeInstructions: undefined, + sqlQuery: undefined, + model: this.BEDROCK_MODEL_ID, + conversationId: conversationId, + }, + insightsDbPool, + userEmail, + ) + + dataStream.writeData({ + type: StreamDataType.CHAT_RESPONSE_ID, + id: chatResponseId, + conversationId: conversationId || '', + }) + } + + /** + * Handle router 'create_query' action - generate and execute SQL query + */ + private async handleCreateQueryAction({ + messages, + date, + projectName, + pipe, + parametersString, + segmentId, + reformulatedQuestion, + dataStream, + }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> { + const textToSqlOutput = await this.runTextToSqlAgent({ + messages, + date, + projectName, + pipe, + parametersString, + segmentId, + reformulatedQuestion, + }) + + const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions) + + dataStream.writeData({ + type: StreamDataType.SQL_RESULT, + explanation: textToSqlOutput.explanation, + instructions: textToSqlOutput.instructions, + data: queryData, + }) + + return { sqlQuery: textToSqlOutput.instructions } + } + + /** + * Handle router 'pipes' action - generate and execute pipe instructions + */ + private async handlePipesAction({ + messages, + date, + projectName, + pipe, + parametersString, + segmentId, + reformulatedQuestion, + toolNames, + dataStream, + responseData, + }: PipeAgentStreamInput): Promise<{ pipeInstructions: PipeInstructions }> { + const pipeOutput = await this.runPipeAgent({ + messages, + date, + projectName, + pipe, + parametersString, + segmentId: segmentId as string, + reformulatedQuestion, + toolNames, + }) + + // Accumulate token usage from pipe agent + if (pipeOutput.usage) { + responseData.inputTokens += pipeOutput.usage.promptTokens || 0 + responseData.outputTokens += pipeOutput.usage.completionTokens || 0 + } + + // Execute the pipes according to the instructions and combine results + const combinedData = await executePipeInstructions(pipeOutput.instructions) + + dataStream.writeData({ + type: StreamDataType.PIPE_RESULT, + explanation: pipeOutput.explanation, + instructions: pipeOutput.instructions, + data: combinedData, + }) + + return { pipeInstructions: pipeOutput.instructions } + } + + /** + * Save final response to database and stream chat response ID + */ + private async handleResponseComplete({ + userPrompt, + responseData, + routerOutput, + pipeInstructions, + sqlQuery, + conversationId, + insightsDbPool, + userEmail, + dataStream, + }: { + userPrompt: string + responseData: ChatResponse + routerOutput: any + pipeInstructions?: PipeInstructions + sqlQuery?: string + conversationId?: string + insightsDbPool: Pool + userEmail: string + dataStream: any + }): Promise { + const chatResponseId = await this.saveChatResponse( + { + userPrompt, + inputTokens: responseData.inputTokens, + outputTokens: responseData.outputTokens, + routerResponse: routerOutput.next_action, + routerReason: routerOutput.reasoning, + pipeInstructions, + sqlQuery, + model: this.BEDROCK_MODEL_ID, + conversationId: conversationId, + }, + insightsDbPool, + userEmail, + ) + + dataStream.writeData({ + type: StreamDataType.CHAT_RESPONSE_ID, + id: chatResponseId, + conversationId: conversationId || '', + }) + } } diff --git a/frontend/lib/chat/enums.ts b/frontend/lib/chat/enums.ts new file mode 100644 index 000000000..32192e973 --- /dev/null +++ b/frontend/lib/chat/enums.ts @@ -0,0 +1,19 @@ +// Copyright (c) 2025 The Linux Foundation and each contributor. +// SPDX-License-Identifier: MIT +export enum StreamDataType { + ROUTER_STATUS = 'router-status', + CHAT_RESPONSE_ID = 'chat-response-id', + SQL_RESULT = 'sql-result', + PIPE_RESULT = 'pipe-result', +} + +export enum StreamDataStatus { + ANALYZING = 'analyzing', + COMPLETE = 'complete', +} + +export enum RouterDecisionAction { + STOP = 'stop', + CREATE_QUERY = 'create_query', + PIPES = 'pipes', +} diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts index 557108969..5f155e1f6 100644 --- a/frontend/lib/chat/types.ts +++ b/frontend/lib/chat/types.ts @@ -1,7 +1,10 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT /* eslint-disable @typescript-eslint/no-explicit-any */ +import type { DataStreamWriter } from 'ai' +import type { Pool } from 'pg' import { z } from 'zod' +import type { ChatResponse } from '~~/server/repo/chat.repo' // ============================================ // Pipe Instruction Types @@ -109,7 +112,7 @@ export type PipeOutput = z.infer & { usage?: any } // ============================================ export interface ChatMessage { - content: string, + content: string role: string } @@ -125,6 +128,13 @@ export interface RouterAgentInput { segmentId: string | null } +export interface PipeAgentStreamInput extends Omit { + dataStream: DataStreamWriter + date: string + responseData: ChatResponse + routerOutput: RouterOutput +} + export interface PipeAgentInput { model: any // Bedrock model instance messages: ChatMessage[] @@ -137,3 +147,35 @@ export interface PipeAgentInput { reformulatedQuestion: string toolNames: string[] // Array of tool names from router } + +export interface DataCopilotQueryInput { + messages: ChatMessage[] + segmentId?: string + projectName?: string + pipe: string + parameters?: Record + conversationId?: string + insightsDbPool: Pool + userEmail: string +} + +export interface TextToSqlAgentInput { + messages: ChatMessage[] + date: string + projectName: string + pipe: string + parametersString: string + segmentId: string + reformulatedQuestion: string +} + +export interface TextToSqlAgentStreamInput { + messages: ChatMessage[] + date: string + projectName: string + pipe: string + parametersString: string + segmentId: string + reformulatedQuestion: string + dataStream: any +} diff --git a/frontend/nuxt.config.ts b/frontend/nuxt.config.ts index 9801729f5..51d22f51a 100644 --- a/frontend/nuxt.config.ts +++ b/frontend/nuxt.config.ts @@ -54,6 +54,12 @@ export default defineNuxtConfig({ insightsDbUsername: "postgres", insightsDbPassword: "example", insightsDbDatabase: "insights", + cmDbWriteHost: "localhost", + cmDbReadHost: "localhost", + cmDbPort: 5432, + cmDbUsername: "postgres", + cmDbPassword: "example", + cmDbDatabase: "crowd-web", // These are also exposed on the client-side public: { apiBase: "/api", diff --git a/frontend/server/api/chat/feedback/[id].ts b/frontend/server/api/chat/feedback/[id].ts index 293032180..5a0a00d1c 100644 --- a/frontend/server/api/chat/feedback/[id].ts +++ b/frontend/server/api/chat/feedback/[id].ts @@ -29,12 +29,12 @@ export default defineEventHandler(async (event): Promise @@ -18,31 +18,44 @@ interface IStreamRequestBody { export default defineEventHandler(async (event): Promise => { try { - const { messages, segmentId, projectName, pipe, parameters, conversationId } = + const { messages, projectName, pipe, parameters, conversationId, projectSlug } = await readBody(event) if (!pipe) { return createError({ statusCode: 400, statusMessage: 'Pipe is required' }) } + if (!projectSlug) { + return createError({ statusCode: 400, statusMessage: 'Project slug is required' }) + } + // Generate conversationId if not provided const finalConversationId = conversationId || crypto.randomUUID() - const dbPool = event.context.dbPool as Pool + const insightsDbPool = event.context.insightsDbPool as Pool + const cmDbPool = event.context.cmDbPool as Pool + + // find project by slug to get the segmentId + const insightsProjectsRepo = new InsightsProjectsRepository(cmDbPool) + + const insightsProjects = await insightsProjectsRepo.findInsightsProjectsBySlug(projectSlug) + + if (!insightsProjects) { + return createError({ statusCode: 404, statusMessage: 'Project not found' }) + } + + const dataCopilot = new DataCopilot() + await dataCopilot.initialize() - return await streamingAgentRequestHandler({ + return dataCopilot.streamingAgentRequestHandler({ messages, - segmentId, + segmentId: insightsProjects.segmentId, projectName, pipe, parameters, conversationId: finalConversationId, - onResponseComplete: dbPool - ? async (response) => { - const chatRepo = new ChatRepository(dbPool) - return await chatRepo.saveChatResponse(response, event.context.user.email) - } - : undefined, + insightsDbPool, + userEmail: event.context.user.email, }) } catch (error) { return createError({ diff --git a/frontend/server/middleware/database.ts b/frontend/server/middleware/database.ts index 45b5fda64..6f4dfcb6e 100644 --- a/frontend/server/middleware/database.ts +++ b/frontend/server/middleware/database.ts @@ -1,11 +1,12 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT -import { getDbPool } from '../utils/db'; +import { getInsightsDbPool, getCMDbPool } from '../utils/db'; export default defineEventHandler(async (event) => { // Only apply to chat endpoints if (event.node.req.url?.startsWith('/api/chat/')) { // Add the database pool to the event context - event.context.dbPool = getDbPool(); + event.context.insightsDbPool = getInsightsDbPool(); + event.context.cmDbPool = getCMDbPool(); } }); \ No newline at end of file diff --git a/frontend/server/repo/chat.repo.ts b/frontend/server/repo/chat.repo.ts index cd2866052..da56fe989 100644 --- a/frontend/server/repo/chat.repo.ts +++ b/frontend/server/repo/chat.repo.ts @@ -1,6 +1,7 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT import type { Pool } from 'pg' +import { RouterDecisionAction } from '~~/lib/chat/enums' export interface PipeInput { endDate?: string @@ -45,7 +46,7 @@ export interface ChatResponse { id?: string conversationId?: string userPrompt: string - routerResponse: 'pipes' | 'text-to-sql' | 'stop' + routerResponse: RouterDecisionAction routerReason: string pipeInstructions?: PipeInstructions sqlQuery?: string diff --git a/frontend/server/repo/insightsProjects.repo.ts b/frontend/server/repo/insightsProjects.repo.ts new file mode 100644 index 000000000..986cde312 --- /dev/null +++ b/frontend/server/repo/insightsProjects.repo.ts @@ -0,0 +1,67 @@ +// Copyright (c) 2025 The Linux Foundation and each contributor. +// SPDX-License-Identifier: MIT +import type { Pool } from 'pg' + +export interface InsightsProject { + id: string + name: string + description?: string + segmentId: string + createdAt: string + updatedAt: string + logoUrl?: string + organizationId: string + website?: string + github?: string + linkedin?: string + twitter?: string + widgets: string[] + repositories: string[] + slug: string + enabled: boolean + isLF: boolean + keywords: string[] + searchKeywords: string[] + deletedAt?: string +} + +export class InsightsProjectsRepository { + constructor(private pool: Pool) {} + + async findInsightsProjectsBySlug(slug: string): Promise { + try { + const query = ` + SELECT + id, + name, + description, + "segmentId", + "createdAt", + "updatedAt", + "logoUrl", + "organizationId", + website, + github, + linkedin, + twitter, + widgets, + repositories, + slug, + enabled, + "isLF", + keywords, + "searchKeywords", + "deletedAt" + FROM "insightsProjects" + WHERE slug = $1 AND "deletedAt" IS NULL + ` + + const result = await this.pool.query(query, [slug]) + return result.rows.length > 0 ? result.rows[0] : null + } catch (error) { + console.error('Error finding insights project by slug:', error) + throw new Error('Could not find insights project') + } + } + +} diff --git a/frontend/server/utils/db.ts b/frontend/server/utils/db.ts index df67c46eb..e129c04e3 100644 --- a/frontend/server/utils/db.ts +++ b/frontend/server/utils/db.ts @@ -3,12 +3,13 @@ import { Pool } from 'pg'; import { isLocal } from './common'; -let pool: Pool | null = null; +let insightsDbPool: Pool | null = null; +let cmDbPool: Pool | null = null; -export function getDbPool(): Pool { +export function getInsightsDbPool(): Pool { const config = useRuntimeConfig() - if (!pool) { - pool = new Pool({ + if (!insightsDbPool) { + insightsDbPool = new Pool({ host: config.insightsDbWriteHost, port: config.insightsDbPort, database: config.insightsDbDatabase, @@ -20,5 +21,24 @@ export function getDbPool(): Pool { ssl: isLocal ? false : { rejectUnauthorized: false }, }); } - return pool; + return insightsDbPool; +} + + +export function getCMDbPool(): Pool { + const config = useRuntimeConfig() + if (!cmDbPool) { + cmDbPool = new Pool({ + host: config.cmDbWriteHost, + port: config.cmDbPort, + database: config.cmDbDatabase, + user: config.cmDbUsername, + password: config.cmDbPassword, + max: 20, + idleTimeoutMillis: 30000, + connectionTimeoutMillis: 2000, + ssl: isLocal ? false : { rejectUnauthorized: false }, + }); + } + return cmDbPool; } \ No newline at end of file From 80a9198136ae216a8f327861f1e57fd0c4622511 Mon Sep 17 00:00:00 2001 From: anilb Date: Thu, 11 Sep 2025 10:43:09 +0200 Subject: [PATCH 04/21] chore: migrate script can point to host network now using a param for running remote migs Signed-off-by: anilb --- database/migrate.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/database/migrate.sh b/database/migrate.sh index 1b577fb58..7f1da72ac 100755 --- a/database/migrate.sh +++ b/database/migrate.sh @@ -2,7 +2,14 @@ set -ex set +o history -# Grab all command line arguments to pass them into Docker, or default to "migrate". +# Check if first argument is --host-network +DOCKER_NETWORK="" +if [ "$1" = "--host-network" ]; then + DOCKER_NETWORK="--network host" + shift # Remove --host-network from arguments +fi + +# Grab remaining command line arguments to pass them into Docker, or default to "migrate". if [ $# -eq 0 ]; then FLYWAY_COMMAND=("migrate") else @@ -11,7 +18,7 @@ fi echo "Running Flyway command: ${FLYWAY_COMMAND[@]} on jdbc:postgresql://${PGHOST}:${PGPORT}/${PGDATABASE}" -docker run --rm \ +docker run --rm ${DOCKER_NETWORK} \ -v "$(pwd)/migrations:/tmp/migrations" \ flyway/flyway:latest-alpine \ -locations="filesystem:/tmp/migrations" \ From 689508985e4980a33b4ac1e684bacb22c969e6d9 Mon Sep 17 00:00:00 2001 From: anilb Date: Thu, 11 Sep 2025 10:43:27 +0200 Subject: [PATCH 05/21] chore: check alter migration updates old enum keys before adding the new one Signed-off-by: anilb --- .../V1756973487__alterChatResponsesRouterResponseCheck.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql b/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql index 7c6044389..f9d4d2785 100644 --- a/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql +++ b/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql @@ -1,6 +1,9 @@ -- Drop the existing check constraint ALTER TABLE chat_responses DROP CONSTRAINT chat_responses_router_response_check; +UPDATE chat_responses SET router_response = 'create_query' +WHERE router_response = 'text-to-sql'; + -- Add the new check constraint with 'create_query' instead of 'text-to-sql' ALTER TABLE chat_responses ADD CONSTRAINT chat_responses_router_response_check CHECK (router_response IN ('pipes', 'create_query', 'stop')); \ No newline at end of file From f4d1cdbdfd85e7ee0e01aa943e974b5d06a9e09b Mon Sep 17 00:00:00 2001 From: anilb Date: Thu, 11 Sep 2025 11:44:41 +0200 Subject: [PATCH 06/21] chore: pass pg pool properly Signed-off-by: anilb --- frontend/server/api/chat/chart.ts | 35 +++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/frontend/server/api/chat/chart.ts b/frontend/server/api/chat/chart.ts index 9044aa7fe..22fa0ec0e 100644 --- a/frontend/server/api/chat/chart.ts +++ b/frontend/server/api/chat/chart.ts @@ -1,28 +1,26 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT -import pg from 'pg' +import { Pool } from 'pg' import { generateChartConfig, modifyChartConfig } from '../../../lib/chat/chart/generator' import { ChatRepository } from '../../repo/chat.repo' import { Result, Config, DataMapping } from '../../../lib/chat/chart/types' import { PipeInstructions } from '~~/lib/chat/types' - export const maxDuration = 30 // Helper function to get router reasoning from conversation -async function getRouterReasoningFromConversation(conversationId?: string): Promise { +async function getRouterReasoningFromConversation( + pool: Pool, + conversationId?: string, +): Promise { if (!conversationId) return undefined - + try { - const pool = new pg.Pool({ - connectionString: process.env.DATABASE_URL, - }) - const chatRepo = new ChatRepository(pool) const latestResponse = await chatRepo.getLatestChatResponseByConversation(conversationId) - + await pool.end() - + return latestResponse?.routerReason || undefined } catch (error) { console.error('Error fetching router reasoning from conversation:', error) @@ -53,7 +51,10 @@ export default defineEventHandler(async (event): Promise(event) // Get router reasoning from conversation - const routerReasoning = await getRouterReasoningFromConversation(conversationId) + const routerReasoning = await getRouterReasoningFromConversation( + event.context.insightsDbPool as Pool, + conversationId, + ) // If pipe instructions are provided, execute them first to get results if (pipeInstructions && !results) { @@ -69,7 +70,11 @@ export default defineEventHandler(async (event): Promise Date: Thu, 11 Sep 2025 11:59:51 +0200 Subject: [PATCH 07/21] chore: remove premature pool.end call Signed-off-by: anilb --- frontend/server/api/chat/chart.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/frontend/server/api/chat/chart.ts b/frontend/server/api/chat/chart.ts index 22fa0ec0e..5103e1aae 100644 --- a/frontend/server/api/chat/chart.ts +++ b/frontend/server/api/chat/chart.ts @@ -19,8 +19,6 @@ async function getRouterReasoningFromConversation( const chatRepo = new ChatRepository(pool) const latestResponse = await chatRepo.getLatestChatResponseByConversation(conversationId) - await pool.end() - return latestResponse?.routerReason || undefined } catch (error) { console.error('Error fetching router reasoning from conversation:', error) From ee7b2460819d8e8f27a2b3ce9c9ed22b6486e99a Mon Sep 17 00:00:00 2001 From: anilb Date: Thu, 11 Sep 2025 18:07:19 +0200 Subject: [PATCH 08/21] feat: optional pipe source for agents, better overall types Signed-off-by: anilb --- frontend/lib/chat/chart/analysis.ts | 2 - frontend/lib/chat/chart/base-config.ts | 1 - frontend/lib/chat/chart/generator.ts | 6 --- frontend/lib/chat/data-copilot.ts | 49 +++++++++++++----------- frontend/lib/chat/prompts/pipe.ts | 14 +++++-- frontend/lib/chat/prompts/router.ts | 16 +++++--- frontend/lib/chat/prompts/text-to-sql.ts | 10 ++++- frontend/lib/chat/types.ts | 19 ++++++++- 8 files changed, 74 insertions(+), 43 deletions(-) diff --git a/frontend/lib/chat/chart/analysis.ts b/frontend/lib/chat/chart/analysis.ts index 6df2ebe5b..98052b81a 100644 --- a/frontend/lib/chat/chart/analysis.ts +++ b/frontend/lib/chat/chart/analysis.ts @@ -2,8 +2,6 @@ // SPDX-License-Identifier: MIT /* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable @typescript-eslint/no-unused-vars */ -// Copyright (c) 2025 The Linux Foundation and each contributor. -// SPDX-License-Identifier: MIT import { DateTime } from 'luxon' import type { Result } from './types' interface RecommendedVisualization { diff --git a/frontend/lib/chat/chart/base-config.ts b/frontend/lib/chat/chart/base-config.ts index 90bad7721..85b4fedaa 100644 --- a/frontend/lib/chat/chart/base-config.ts +++ b/frontend/lib/chat/chart/base-config.ts @@ -1,6 +1,5 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT - import type { Config } from './types' import { lfxColors } from '~/config/styles/colors' diff --git a/frontend/lib/chat/chart/generator.ts b/frontend/lib/chat/chart/generator.ts index 1117af639..48bd63ae4 100644 --- a/frontend/lib/chat/chart/generator.ts +++ b/frontend/lib/chat/chart/generator.ts @@ -4,8 +4,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable max-len */ /* eslint-disable vue/max-len */ -// Copyright (c) 2025 The Linux Foundation and each contributor. -// SPDX-License-Identifier: MIT import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock' import { generateObject } from 'ai' import { outputSchema } from './types' @@ -46,10 +44,6 @@ const chartColors = { lines: [lfxColors.positive[500], lfxColors.negative[500], lfxColors.brand[300]], }, } - -// Legacy defaultColors for backward compatibility (if needed elsewhere) -// const defaultColors = chartColors.single; - const model = bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0') export async function generateChartConfig( diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 03033230b..7fc36364a 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -1,11 +1,11 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT -/* eslint-disable @typescript-eslint/no-explicit-any */ import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock' import { experimental_createMCPClient as createMCPClient, createDataStreamResponse, type LanguageModelV1, + type DataStreamWriter, } from 'ai' import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js' import type { Pool } from 'pg' @@ -15,11 +15,13 @@ import { ChatRepository } from '../../server/repo/chat.repo' import { TextToSqlAgent, PipeAgent, RouterAgent } from './agents' import { executePipeInstructions, executeTextToSqlInstructions } from './instructions' import type { + AgentResponseCompleteParams, DataCopilotQueryInput, PipeAgentInput, PipeAgentStreamInput, PipeInstructions, RouterAgentInput, + RouterOutput, TextToSqlAgentInput, TextToSqlAgentStreamInput, } from './types' @@ -31,12 +33,26 @@ const bedrock = createAmazonBedrock({ region: process.env.NUXT_AWS_BEDROCK_REGION, }) +type MCPClient = Awaited> + +type TbTools = Record< + string, + { + description?: string + meta?: { description?: string } + inputSchema?: unknown + parameters?: unknown + schema?: unknown + [key: string]: unknown // Allow additional properties + } +> + export class DataCopilot { /** MCP client for communicating with Tinybird services */ - private mcpClient: any + private mcpClient!: MCPClient /** Available Tinybird tools loaded from MCP server */ - private tbTools: Record = {} + private tbTools: TbTools = {} /** Human-readable overview of tools for router agent decision making */ private toolsOverview: string = '' @@ -85,7 +101,7 @@ export class DataCopilot { this.toolsOverview = Object.entries(this.tbTools) .filter(([name]) => !excludedFromOverview.has(name)) - .map(([name, def]: [string, any]) => { + .map(([name, def]: [string, TbTools[string]]) => { try { const description = def?.description || def?.meta?.description || '' const inputSchema = def?.inputSchema || def?.parameters || def?.schema || undefined @@ -170,7 +186,7 @@ export class DataCopilot { segmentId, reformulatedQuestion, }: TextToSqlAgentInput) { - const followUpTools: Record = {} + const followUpTools: Record = {} followUpTools['text_to_sql'] = this.tbTools['text_to_sql'] followUpTools['list_datasources'] = this.tbTools['list_datasources'] @@ -213,7 +229,7 @@ export class DataCopilot { reformulatedQuestion, toolNames, }: Omit) { - const followUpTools: Record = {} + const followUpTools: Record = {} for (const toolName of toolNames) { if (this.tbTools[toolName]) { followUpTools[toolName] = this.tbTools[toolName] @@ -365,16 +381,13 @@ export class DataCopilot { */ private async handleStopAction( userPrompt: string, - routerOutput: any, - responseData: any, - dataStream: any, + routerOutput: RouterOutput, + responseData: ChatResponse, + dataStream: DataStreamWriter, insightsDbPool: Pool, userEmail: string, conversationId?: string, ): Promise { - responseData.reasoning = `Router Decision: ${routerOutput.next_action}\nReasoning: ${routerOutput.reasoning}` - responseData.answer = routerOutput.reasoning - dataStream.writeData({ type: StreamDataType.ROUTER_STATUS, status: StreamDataStatus.COMPLETE, @@ -497,17 +510,7 @@ export class DataCopilot { insightsDbPool, userEmail, dataStream, - }: { - userPrompt: string - responseData: ChatResponse - routerOutput: any - pipeInstructions?: PipeInstructions - sqlQuery?: string - conversationId?: string - insightsDbPool: Pool - userEmail: string - dataStream: any - }): Promise { + }: AgentResponseCompleteParams): Promise { const chatResponseId = await this.saveChatResponse( { userPrompt, diff --git a/frontend/lib/chat/prompts/pipe.ts b/frontend/lib/chat/prompts/pipe.ts index eebcb49dd..906ac99c8 100644 --- a/frontend/lib/chat/prompts/pipe.ts +++ b/frontend/lib/chat/prompts/pipe.ts @@ -8,12 +8,19 @@ export const pipePrompt = ( segmentId: string | null, reformulatedQuestion: string, tools: string[], -) => ` +) => { + const dashboardDescription = pipe + ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}` + : `Project "${projectName}"${parametersString ? ` with parameters: ${parametersString}` : ''}` + + const usePipeInstruction = pipe ? `- Use ${pipe} with different parameters if needed` : '' + + return ` You are a pipe tool specialist that creates an execution plan to answer: "${reformulatedQuestion}" # DATE AND CONTEXT Today's date: ${date} -Current dashboard: Project "${projectName}" using ${pipe} tool with parameters: ${parametersString} +Current dashboard: ${dashboardDescription} Segment ID: ${segmentId || 'not specified'} # AVAILABLE TOOLS @@ -67,7 +74,7 @@ Your response must include an "instructions" field with this structure: - Execute the pipes and examine what columns are returned, and which columns are needed to answer the question - Map the columns from pipe results to the final output structure using "type": "direct" - Add formula columns when calculations are needed (e.g., growth rates, percentages, differences) -- Use ${pipe} with different parameters if needed +${usePipeInstruction} - Use other available tools if they're more appropriate - Call multiple tools if needed to answer the question - Combine columns from multiple pipes if needed for comprehensive answers @@ -130,3 +137,4 @@ Always ensure variables in formulas match the dependency variable names. - Answer the reformulated question directly - Use the tools specified by the router - Be concise and accurate in your response` +} diff --git a/frontend/lib/chat/prompts/router.ts b/frontend/lib/chat/prompts/router.ts index 9116f7ddb..1b44b310a 100644 --- a/frontend/lib/chat/prompts/router.ts +++ b/frontend/lib/chat/prompts/router.ts @@ -2,8 +2,6 @@ // SPDX-License-Identifier: MIT /* eslint-disable max-len */ /* eslint-disable vue/max-len */ -// Copyright (c) 2025 The Linux Foundation and each contributor. -// SPDX-License-Identifier: MIT export const routerPrompt = ( date: string, projectName: string, @@ -11,11 +9,18 @@ export const routerPrompt = ( parametersString: string, segmentId: string | null, toolsOverview: string, -) => `You are a routing agent that analyzes user questions and determines the appropriate next action. Your job is to evaluate questions and decide whether they can be answered with existing tools, need custom queries, or cannot be answered. +) => { + const dashboardDescription = pipe + ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}` + : `Project "${projectName}"${parametersString ? ` with parameters: ${parametersString}` : ''}` + + const pipeToolQuestion = pipe ? `- Can ${pipe} tool answer this with different parameters?` : '' + + return `You are a routing agent that analyzes user questions and determines the appropriate next action. Your job is to evaluate questions and decide whether they can be answered with existing tools, need custom queries, or cannot be answered. # DATE AND CONTEXT Today's date: ${date} -Current dashboard: Project "${projectName}" using ${pipe} tool with parameters: ${parametersString} +Current dashboard: ${dashboardDescription} Segment ID: ${segmentId || 'not specified'} # YOUR ROLE @@ -37,7 +42,7 @@ ${toolsOverview} **Step 1: Check Existing Tools (HIGHEST PRIORITY)** - **FIRST: For activity-count-related queries (stars count, forks count, commits count, etc.) → ALWAYS use activities_count or activities_cumulative_count pipes** -- Can ${pipe} tool answer this with different parameters? +${pipeToolQuestion} - IMPORTANT: Only the parameters listed in the tool's parameters are valid. You cannot add extra parameters. - For example, adding a country code parameter to a tool that doesn't support it is invalid. - Can other available tools answer this question? @@ -87,3 +92,4 @@ It must be something user-friendly. - If the action is "create_query", the reasoning must be something like "I'll create a query to answer the question." - If the action is "pipes", the reasoning must be something like "I'll use the widgets and to answer the question." ` +} diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts index c48367dac..9cff0a130 100644 --- a/frontend/lib/chat/prompts/text-to-sql.ts +++ b/frontend/lib/chat/prompts/text-to-sql.ts @@ -7,14 +7,19 @@ export const textToSqlPrompt = ( parametersString: string, segmentId: string | null, reformulatedQuestion: string, -) => ` +) => { + const dashboardDescription = pipe + ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}` + : `Project "${projectName}"${parametersString ? ` with parameters: ${parametersString}` : ''}` + + return ` You are an expert SQL query generator that creates execution plans to answer: "${reformulatedQuestion}" Think step-by-step through the structured approach below. Be methodical and careful to ensure accuracy. # DATE AND CONTEXT Today's date: ${date} -Current dashboard: Project "${projectName}" using ${pipe} tool with parameters: ${parametersString} +Current dashboard: ${dashboardDescription} Segment ID: ${segmentId || 'not specified'} # YOUR TASK @@ -211,3 +216,4 @@ IMPORTANT REMINDERS: - Return the query with appropriate LIMIT in the instructions (not the test LIMIT 5) - Build your query completely and correctly BEFORE testing - Put MAXIMUM effort into getting it right the first time` +} diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts index 5f155e1f6..feafb4389 100644 --- a/frontend/lib/chat/types.ts +++ b/frontend/lib/chat/types.ts @@ -4,6 +4,7 @@ import type { DataStreamWriter } from 'ai' import type { Pool } from 'pg' import { z } from 'zod' +import { RouterDecisionAction } from './enums' import type { ChatResponse } from '~~/server/repo/chat.repo' // ============================================ @@ -89,7 +90,11 @@ export type Instructions = z.infer // Router agent output schema export const routerOutputSchema = z.object({ - next_action: z.enum(['stop', 'create_query', 'pipes']), + next_action: z.enum([ + RouterDecisionAction.STOP, + RouterDecisionAction.CREATE_QUERY, + RouterDecisionAction.PIPES, + ]), reasoning: z.string().describe('Maximum 2 sentences explaining the decision'), reformulated_question: z.string().describe('Enhanced query with all parameters'), tools: z.array(z.string()).describe('Tools needed for next agent'), @@ -179,3 +184,15 @@ export interface TextToSqlAgentStreamInput { reformulatedQuestion: string dataStream: any } + +export interface AgentResponseCompleteParams { + userPrompt: string + responseData: ChatResponse + routerOutput: RouterOutput + pipeInstructions?: PipeInstructions + sqlQuery?: string + conversationId?: string + insightsDbPool: Pool + userEmail: string + dataStream: DataStreamWriter +} From 5f484c10b35aa2418c94532523d241505d200c74 Mon Sep 17 00:00:00 2001 From: anil Date: Fri, 12 Sep 2025 18:11:57 +0300 Subject: [PATCH 09/21] feat: allow text_to_sql agent to execute queries for validation Signed-off-by: anil --- frontend/lib/chat/agents/text-to-sql.ts | 41 +++++++++++++ frontend/lib/chat/data-copilot.ts | 1 + frontend/lib/chat/prompts/text-to-sql.ts | 75 ++++++++++++++---------- 3 files changed, 85 insertions(+), 32 deletions(-) diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts index 9cce9b4a5..72d7162f4 100644 --- a/frontend/lib/chat/agents/text-to-sql.ts +++ b/frontend/lib/chat/agents/text-to-sql.ts @@ -78,4 +78,45 @@ export class TextToSqlAgent extends BaseAgent { protected override shouldMonitorToolCalls(_input: TextToSqlAgentInput): boolean { return true // Enable tool call monitoring for SQL agent } + + /** + * Override to add validation for text_to_sql tool calls + */ + protected override logToolCalls(response: any): void { + // Call parent method first to get normal logging + super.logToolCalls(response) + + // Add validation for text_to_sql tool calls + if (!response.steps || response.steps.length === 0) return + + for (const step of response.steps) { + if (step.toolCalls && step.toolCalls.length > 0) { + for (const call of step.toolCalls) { + if (call.toolName === 'text_to_sql') { + const question = call.args?.question || '' + + // Check if the question looks like SQL code (basic heuristic) + if (this.looksLikeSQL(question)) { + console.error(`❌ WARNING: text_to_sql tool called with SQL code instead of natural language question:`) + console.error(`Question: ${question}`) + console.error('text_to_sql tool should receive natural language questions, not SQL code') + // Don't throw error, just warn - allow the process to continue + } + } + } + } + } + } + + /** + * Basic heuristic to detect if a string looks like SQL code + */ + private looksLikeSQL(text: string): boolean { + const sqlKeywords = ['SELECT', 'FROM', 'WHERE', 'JOIN', 'GROUP BY', 'ORDER BY', 'LIMIT', 'WITH'] + const upperText = text.toUpperCase() + + // If it contains multiple SQL keywords, it's likely SQL code + const keywordCount = sqlKeywords.filter(keyword => upperText.includes(keyword)).length + return keywordCount >= 2 + } } diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 7fc36364a..523c6df58 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -189,6 +189,7 @@ export class DataCopilot { const followUpTools: Record = {} followUpTools['text_to_sql'] = this.tbTools['text_to_sql'] followUpTools['list_datasources'] = this.tbTools['list_datasources'] + followUpTools['execute_query'] = this.tbTools['execute_query'] const agent = new TextToSqlAgent() return agent.execute({ diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts index 9cff0a130..b9a49bd44 100644 --- a/frontend/lib/chat/prompts/text-to-sql.ts +++ b/frontend/lib/chat/prompts/text-to-sql.ts @@ -39,14 +39,18 @@ Your response must include an "instructions" field with a query string: - Study the schema carefully, noting column names and types - Identify which tables contain the data you need +**text_to_sql Tool:** +- Use with the natural language question to generate the SQL query +- Pass the user's question as the 'question' parameter +- Do NOT pass SQL code to this tool - it expects natural language questions only +- Build your understanding from schema first, then use text_to_sql +- You may need to refine your question and call this tool again based on validation results + **execute_query Tool:** -- This is for VALIDATION, not experimentation -- Build your query carefully and completely BEFORE testing +- Use for VALIDATION after generating SQL with text_to_sql - Add LIMIT 5 when validating to check the query works -- Use the tool to validate your query works -- If it fails or the results don't answer the question, fix the specific error and test again -- The returned SQL should be the EXACT tested query (with appropriate LIMIT, not the test LIMIT 5) -- Put extra effort to get it right the first time to avoid iterations +- If it fails, refine your natural language question and try text_to_sql again +- The final returned SQL should have appropriate LIMIT (not the test LIMIT 5) Remember: Think through the ENTIRE query before testing. Minimize iterations. @@ -61,24 +65,22 @@ Follow this step-by-step process: - Identify which tables are relevant based on the query - Understand the available tables, columns, and relationships -**STEP 2: BUILD THE COMPLETE QUERY** -- Design a query that fulfills the user's request -- Apply ALL query enhancement rules -- Use Tinybird's ClickHouse SQL syntax -- ALWAYS filter by segmentId on activityRelations_deduplicated_cleaned_ds when applicable -- ALWAYS include timestamp filters when querying time-based data -- Ensure you're using the EXACT table and column names from the schemas -- Double-check all table/column names match the schema exactly -- Make sure the query is COMPLETE and CORRECT before proceeding +**STEP 2: GENERATE AND VALIDATE SQL QUERY** +- Use the text_to_sql tool with the reformulated question +- Pass the natural language question to the 'question' parameter +- Apply ALL query enhancement rules in your question formulation +- Ensure the question mentions segmentId filtering when applicable +- Ensure the question mentions timestamp filtering for time-based queries **STEP 3: VALIDATION** -- Use execute_query with your complete query (add LIMIT 5 for testing) -- If it succeeds: Return the query in the instructions with the appropriate LIMIT (not LIMIT 5) -- If it fails: Fix the specific error and test again -- Put maximum effort into getting it right the first time +- Use execute_query with the generated SQL (add LIMIT 5 for testing) +- If it succeeds: Proceed to return instructions with appropriate LIMIT +- If it fails: Refine your natural language question and try text_to_sql again +- Put maximum effort into getting it right with minimal iterations **STEP 4: RETURN INSTRUCTIONS** -- Create the instructions with your validated SQL query +- Create the instructions with the validated SQL query +- Use appropriate LIMIT for final query (not the test LIMIT 5) - Do not return the data, only the query plan - Provide a brief explanation of your query logic @@ -176,6 +178,12 @@ length(), empty(), notEmpty(), arrayElement(), has(), hasAll(), hasAny(), indexOf(), arrayCount(), arraySum(), arrayAvg(), arrayMin(), arrayMax(), arrayUniq(), arrayJoin(), arrayConcat(), arraySlice(), arraySort(), arrayReverse() +**Window Functions:** +row_number(), rank(), dense_rank(), percent_rank(), +lagInFrame(x[, offset[, default]]), leadInFrame(x[, offset[, default]]), +first_value(x), last_value(x), nth_value(x, offset) +Note: Use lagInFrame() instead of LAG(), leadInFrame() instead of LEAD() + **JSON Functions (if table has JSON columns):** JSONExtract(), JSONExtractString(), JSONExtractInt(), JSONExtractFloat(), JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength() @@ -190,10 +198,11 @@ JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength() # CRITICAL REMINDERS 1. **Tool Usage Discipline:** - - list_datasources: Use ONCE at the beginning - - execute_query: Use for validation, minimize iterations - - Think through the ENTIRE query before testing - - Put maximum effort into getting it right the first time + - list_datasources: Use ONCE at the beginning to understand schema + - text_to_sql: Use with natural language questions (may need refinement) + - execute_query: Use for validation with LIMIT 5 + - Do NOT call text_to_sql with SQL code - only natural language questions + - Put maximum effort into formulating questions correctly to minimize iterations 2. **Always Apply Filters:** - segmentId filter on activityRelations_deduplicated_cleaned_ds when applicable @@ -201,9 +210,10 @@ JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength() - Use provided parameters as defaults 3. **Efficiency:** - - Build complete, correct queries before testing - - Minimize iterations - get it right the first time - - Use existing tools when possible + - Understand schema first, then generate and validate SQL + - Minimize iterations by crafting good natural language questions + - Use validation to ensure query works before returning instructions + - Use the text_to_sql tool properly with natural language questions only **RESPONSE GUIDELINES** - Create a clear SQL query in the instructions @@ -211,9 +221,10 @@ JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength() - Provide a brief explanation of your query selection and how it answers the question IMPORTANT REMINDERS: -- Use list_datasources ONCE at the beginning -- Use execute_query for validation with LIMIT 5 (iterate if needed, but minimize iterations) -- Return the query with appropriate LIMIT in the instructions (not the test LIMIT 5) -- Build your query completely and correctly BEFORE testing -- Put MAXIMUM effort into getting it right the first time` +- Use list_datasources ONCE at the beginning to understand available tables +- Use text_to_sql with natural language questions (NOT SQL code) +- Use execute_query with LIMIT 5 to validate the generated SQL works +- Return the validated query in the instructions field (with appropriate LIMIT) +- Formulate questions to text_to_sql correctly to minimize validation iterations +- Put MAXIMUM effort into getting the question right with minimal iterations` } From f030dfb7213f4203d7f4f412f25c9f65f9204045 Mon Sep 17 00:00:00 2001 From: anil Date: Fri, 12 Sep 2025 18:22:35 +0300 Subject: [PATCH 10/21] fix: less agressive sql detection Signed-off-by: anil --- frontend/lib/chat/agents/text-to-sql.ts | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts index 72d7162f4..0b1b4288c 100644 --- a/frontend/lib/chat/agents/text-to-sql.ts +++ b/frontend/lib/chat/agents/text-to-sql.ts @@ -112,11 +112,16 @@ export class TextToSqlAgent extends BaseAgent { * Basic heuristic to detect if a string looks like SQL code */ private looksLikeSQL(text: string): boolean { - const sqlKeywords = ['SELECT', 'FROM', 'WHERE', 'JOIN', 'GROUP BY', 'ORDER BY', 'LIMIT', 'WITH'] - const upperText = text.toUpperCase() + // More specific SQL patterns that indicate actual SQL code, not natural language + const sqlPatterns = [ + /^\s*SELECT\s+/i, // Starts with SELECT + /\bFROM\s+\w+\s*$/i, // Ends with FROM table + /\bSELECT\s+.*\s+FROM\s+/i, // Contains SELECT ... FROM pattern + /\bWITH\s+\w+\s+AS\s*\(/i, // CTE pattern WITH name AS ( + /\bUNION\s+(ALL\s+)?SELECT/i, // UNION SELECT pattern + ] - // If it contains multiple SQL keywords, it's likely SQL code - const keywordCount = sqlKeywords.filter(keyword => upperText.includes(keyword)).length - return keywordCount >= 2 + // Check for actual SQL structure patterns + return sqlPatterns.some(pattern => pattern.test(text)) } } From 476fab9c071494dbf514cb1e924de43a8bcc4bb1 Mon Sep 17 00:00:00 2001 From: anil Date: Fri, 12 Sep 2025 18:36:51 +0300 Subject: [PATCH 11/21] chore: text-to-sql logging Signed-off-by: anil --- frontend/lib/chat/data-copilot.ts | 24 ++++++++++++++++++++++++ frontend/lib/chat/instructions.ts | 22 ++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 523c6df58..12b666e78 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -431,6 +431,12 @@ export class DataCopilot { reformulatedQuestion, dataStream, }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> { + console.warn('🚀 handleCreateQueryAction called with:', { + reformulatedQuestion: reformulatedQuestion?.substring(0, 100) + '...', + projectName, + pipe, + }) + const textToSqlOutput = await this.runTextToSqlAgent({ messages, date, @@ -441,8 +447,24 @@ export class DataCopilot { reformulatedQuestion, }) + console.warn('🤖 TextToSqlAgent output:', { + hasExplanation: !!textToSqlOutput.explanation, + explanationLength: textToSqlOutput.explanation?.length || 0, + hasInstructions: !!textToSqlOutput.instructions, + instructionsType: typeof textToSqlOutput.instructions, + instructionsLength: textToSqlOutput.instructions?.length || 0, + instructionsPreview: textToSqlOutput.instructions?.substring(0, 150) + '...', + }) + const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions) + console.warn('📊 Query execution result:', { + queryDataType: typeof queryData, + queryDataLength: queryData?.length || 0, + isArray: Array.isArray(queryData), + firstItemKeys: queryData?.[0] ? Object.keys(queryData[0]) : null, + }) + dataStream.writeData({ type: StreamDataType.SQL_RESULT, explanation: textToSqlOutput.explanation, @@ -450,6 +472,8 @@ export class DataCopilot { data: queryData, }) + console.warn('📤 Streamed data to client with type:', StreamDataType.SQL_RESULT) + return { sqlQuery: textToSqlOutput.instructions } } diff --git a/frontend/lib/chat/instructions.ts b/frontend/lib/chat/instructions.ts index 96e7e205b..ee6929444 100644 --- a/frontend/lib/chat/instructions.ts +++ b/frontend/lib/chat/instructions.ts @@ -208,6 +208,12 @@ export async function executePipeInstructions(instructions: PipeInstructions): P // Function to execute a SQL query via TinyBird's Query API export async function executeTextToSqlInstructions(query: TextToSqlInstructions): Promise { + console.warn('🔍 executeTextToSqlInstructions called with query:', { + queryType: typeof query, + queryLength: query?.length || 0, + queryPreview: query?.substring(0, 100) + (query?.length > 100 ? '...' : ''), + }) + const tinybirdBaseUrl = process.env.NUXT_TINYBIRD_BASE_URL const tinybirdToken = process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN @@ -219,7 +225,13 @@ export async function executeTextToSqlInstructions(query: TextToSqlInstructions) // Execute the SQL query via TinyBird's Query API // TinyBird expects the query as URL-encoded form data const params = new URLSearchParams() - params.append('q', `${query} FORMAT JSON`) + const finalQuery = `${query} FORMAT JSON` + params.append('q', finalQuery) + + console.warn('📤 Executing TinyBird query:', { + url: `${tinybirdBaseUrl}/v0/sql`, + query: finalQuery.substring(0, 200) + (finalQuery.length > 200 ? '...' : ''), + }) const response = await ofetch(`${tinybirdBaseUrl}/v0/sql`, { method: 'POST', @@ -230,10 +242,16 @@ export async function executeTextToSqlInstructions(query: TextToSqlInstructions) body: params.toString(), }) + console.warn('📥 TinyBird response:', { + hasData: !!response.data, + dataLength: response.data?.length || 0, + responseKeys: Object.keys(response || {}), + }) + // TinyBird SQL API response format has data array return response.data || [] } catch (error: any) { - console.error('Error executing SQL query:', error) + console.error('❌ Error executing SQL query:', error) // Log more details about the error if (error.data) { console.error('Error response data:', error.data) From 34013b281d471c3ab367a0ed2a86dea06bbecd55 Mon Sep 17 00:00:00 2001 From: anil Date: Fri, 12 Sep 2025 18:58:42 +0300 Subject: [PATCH 12/21] chore: blocking streaming logging Signed-off-by: anil --- frontend/lib/chat/data-copilot.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 12b666e78..5a20f99d5 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -268,6 +268,13 @@ export class DataCopilot { const date = new Date().toISOString().slice(0, 10) return createDataStreamResponse({ + headers: { + 'Cache-Control': 'no-cache, no-store, must-revalidate', + 'Pragma': 'no-cache', + 'Expires': '0', + 'Connection': 'keep-alive', + 'X-Accel-Buffering': 'no', // Disable Nginx buffering if present + }, execute: async (dataStream) => { const responseData: ChatResponse = { userPrompt: messages[messages.length - 1]?.content || '', @@ -282,10 +289,12 @@ export class DataCopilot { } try { + console.warn('📤 Writing ANALYZING status to stream at:', new Date().toISOString()) dataStream.writeData({ type: StreamDataType.ROUTER_STATUS, status: StreamDataStatus.ANALYZING, }) + console.warn('✅ ANALYZING status written to stream') const routerOutput = await this.runRouterAgent({ messages, @@ -315,12 +324,14 @@ export class DataCopilot { return } + console.warn('📤 Writing COMPLETE status to stream at:', new Date().toISOString()) dataStream.writeData({ type: StreamDataType.ROUTER_STATUS, status: StreamDataStatus.COMPLETE, reasoning: routerOutput.reasoning, reformulatedQuestion: routerOutput.reformulated_question, }) + console.warn('✅ COMPLETE status written to stream') let sqlQuery: string | undefined = undefined let pipeInstructions: PipeInstructions | undefined = undefined @@ -366,11 +377,14 @@ export class DataCopilot { dataStream, }) } catch (error) { + console.error('❌ Error in streamingAgentRequestHandler:', error) + console.warn('📤 Writing ERROR status to stream at:', new Date().toISOString()) dataStream.writeData({ type: 'router-status', status: 'error', error: error instanceof Error ? error.message : 'An error occurred', }) + console.warn('✅ ERROR status written to stream') throw error } }, From c42efea70b2bd845b060690e942d7108128b88a1 Mon Sep 17 00:00:00 2001 From: anil Date: Fri, 12 Sep 2025 19:11:02 +0300 Subject: [PATCH 13/21] chore: headers for disable buffering Signed-off-by: anil --- frontend/lib/chat/data-copilot.ts | 7 ------- frontend/server/api/chat/stream.ts | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 5a20f99d5..697c32f08 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -268,13 +268,6 @@ export class DataCopilot { const date = new Date().toISOString().slice(0, 10) return createDataStreamResponse({ - headers: { - 'Cache-Control': 'no-cache, no-store, must-revalidate', - 'Pragma': 'no-cache', - 'Expires': '0', - 'Connection': 'keep-alive', - 'X-Accel-Buffering': 'no', // Disable Nginx buffering if present - }, execute: async (dataStream) => { const responseData: ChatResponse = { userPrompt: messages[messages.length - 1]?.content || '', diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts index 337e05988..490005a0e 100644 --- a/frontend/server/api/chat/stream.ts +++ b/frontend/server/api/chat/stream.ts @@ -17,6 +17,13 @@ interface IStreamRequestBody { } export default defineEventHandler(async (event): Promise => { + // Set streaming headers immediately + setHeader(event, 'Cache-Control', 'no-cache, no-store, must-revalidate') + setHeader(event, 'Pragma', 'no-cache') + setHeader(event, 'Expires', '0') + setHeader(event, 'Connection', 'keep-alive') + setHeader(event, 'X-Accel-Buffering', 'no') + try { const { messages, projectName, pipe, parameters, conversationId, projectSlug } = await readBody(event) From 56cbed740a5ddf5f2d6ddff376bdf2d6f4f814c9 Mon Sep 17 00:00:00 2001 From: anil Date: Fri, 12 Sep 2025 19:23:20 +0300 Subject: [PATCH 14/21] chore: moved createDataStreamResponse out of DataCopilot to test streaming Signed-off-by: anil --- frontend/lib/chat/data-copilot.ts | 13 ++++--------- frontend/lib/chat/types.ts | 1 + frontend/server/api/chat/stream.ts | 24 +++++++++++++++--------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index 697c32f08..f22932b6f 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -3,9 +3,7 @@ import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock' import { experimental_createMCPClient as createMCPClient, - createDataStreamResponse, type LanguageModelV1, - type DataStreamWriter, } from 'ai' import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js' import type { Pool } from 'pg' @@ -263,13 +261,12 @@ export class DataCopilot { conversationId, insightsDbPool, userEmail, - }: DataCopilotQueryInput): Promise { + dataStream, + }: DataCopilotQueryInput): Promise { const parametersString = JSON.stringify(parameters || {}) const date = new Date().toISOString().slice(0, 10) - return createDataStreamResponse({ - execute: async (dataStream) => { - const responseData: ChatResponse = { + const responseData: ChatResponse = { userPrompt: messages[messages.length - 1]?.content || '', inputTokens: 0, outputTokens: 0, @@ -380,8 +377,6 @@ export class DataCopilot { console.warn('✅ ERROR status written to stream') throw error } - }, - }) } /** @@ -391,7 +386,7 @@ export class DataCopilot { userPrompt: string, routerOutput: RouterOutput, responseData: ChatResponse, - dataStream: DataStreamWriter, + dataStream: any, insightsDbPool: Pool, userEmail: string, conversationId?: string, diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts index feafb4389..24f41cea5 100644 --- a/frontend/lib/chat/types.ts +++ b/frontend/lib/chat/types.ts @@ -162,6 +162,7 @@ export interface DataCopilotQueryInput { conversationId?: string insightsDbPool: Pool userEmail: string + dataStream: DataStreamWriter // DataStreamWriter from AI SDK } export interface TextToSqlAgentInput { diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts index 490005a0e..10c419f19 100644 --- a/frontend/server/api/chat/stream.ts +++ b/frontend/server/api/chat/stream.ts @@ -1,6 +1,7 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT import type { Pool } from 'pg' +import { createDataStreamResponse } from 'ai' import { DataCopilot } from '~~/lib/chat/data-copilot' import { InsightsProjectsRepository } from '~~/server/repo/insightsProjects.repo' import { ChatMessage } from '~~/lib/chat/types' @@ -54,15 +55,20 @@ export default defineEventHandler(async (event): Promise => { const dataCopilot = new DataCopilot() await dataCopilot.initialize() - return dataCopilot.streamingAgentRequestHandler({ - messages, - segmentId: insightsProjects.segmentId, - projectName, - pipe, - parameters, - conversationId: finalConversationId, - insightsDbPool, - userEmail: event.context.user.email, + return createDataStreamResponse({ + execute: async (dataStream) => { + await dataCopilot.streamingAgentRequestHandler({ + messages, + segmentId: insightsProjects.segmentId, + projectName, + pipe, + parameters, + conversationId: finalConversationId, + insightsDbPool, + userEmail: event.context.user.email, + dataStream, // Pass the dataStream to the class + }) + } }) } catch (error) { return createError({ From 5207800de677f54ef48dabe0a3752a640d6cc10f Mon Sep 17 00:00:00 2001 From: anil Date: Sat, 13 Sep 2025 20:49:21 +0300 Subject: [PATCH 15/21] chore: test headers for cf streaming issues Signed-off-by: anil --- frontend/lib/chat/data-copilot.ts | 6 ++++++ frontend/server/api/chat/stream.ts | 7 ++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index f22932b6f..d5ab9edda 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -1,5 +1,6 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT +/* eslint-disable @typescript-eslint/no-explicit-any */ import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock' import { experimental_createMCPClient as createMCPClient, @@ -284,6 +285,11 @@ export class DataCopilot { type: StreamDataType.ROUTER_STATUS, status: StreamDataStatus.ANALYZING, }) + // Add padding for Cloudflare streaming threshold + dataStream.writeData({ + type: 'padding', + data: ' '.repeat(2048), + }) console.warn('✅ ANALYZING status written to stream') const routerOutput = await this.runRouterAgent({ diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts index 10c419f19..22c812054 100644 --- a/frontend/server/api/chat/stream.ts +++ b/frontend/server/api/chat/stream.ts @@ -18,12 +18,13 @@ interface IStreamRequestBody { } export default defineEventHandler(async (event): Promise => { - // Set streaming headers immediately - setHeader(event, 'Cache-Control', 'no-cache, no-store, must-revalidate') + // Set streaming headers for Cloudflare compatibility + setHeader(event, 'Cache-Control', 'no-cache, no-store, must-revalidate, no-transform') setHeader(event, 'Pragma', 'no-cache') setHeader(event, 'Expires', '0') - setHeader(event, 'Connection', 'keep-alive') setHeader(event, 'X-Accel-Buffering', 'no') + setHeader(event, 'Content-Type', 'text/plain; charset=utf-8') + setHeader(event, 'Connection', 'close') try { const { messages, projectName, pipe, parameters, conversationId, projectSlug } = From 9196bc62fe8bbc78b493977240bade36a8e4ead7 Mon Sep 17 00:00:00 2001 From: anil Date: Sat, 13 Sep 2025 21:16:08 +0300 Subject: [PATCH 16/21] fix: enforcing text-to-sql response type, code cleaning Signed-off-by: anil --- frontend/lib/chat/agents/base-agent.ts | 18 +++++++++--- frontend/lib/chat/data-copilot.ts | 35 ------------------------ frontend/lib/chat/instructions.ts | 17 ------------ frontend/lib/chat/prompts/text-to-sql.ts | 17 ++++++++---- 4 files changed, 25 insertions(+), 62 deletions(-) diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts index 0e1be535c..2927f485f 100644 --- a/frontend/lib/chat/agents/base-agent.ts +++ b/frontend/lib/chat/agents/base-agent.ts @@ -157,33 +157,43 @@ export abstract class BaseAgent { * Extract and validate JSON from the response text */ protected getJson(text: string): TOutput { + // Debug logging to see what the agent actually returned + console.warn(`🔍 ${this.name} agent raw response:`, text.substring(0, 500) + (text.length > 500 ? '...' : '')) + // First, try simple JSON.parse since the text usually contains valid JSON let parsedOutput try { parsedOutput = JSON.parse(text) + console.warn(`✅ ${this.name} agent JSON.parse succeeded`) } catch { // Fall back to extractJSON if direct parsing fails try { parsedOutput = extractJSON(text) + console.warn(`✅ ${this.name} agent extractJSON succeeded`) } catch (error) { - console.error(`${this.name} agent failed to parse JSON:`, error) - console.error(`Response text:`, text) + console.error(`❌ ${this.name} agent failed to parse JSON:`, error) + console.error(`❌ Response text:`, text) throw new Error(`${this.name} agent did not return valid JSON`) } } if (!parsedOutput) { - console.error('No JSON found in the response') + console.error('❌ No JSON found in the response') console.error(text) throw new Error(`${this.name} agent did not return valid JSON`) } + // Debug logging for parsed output + console.warn(`🔍 ${this.name} agent parsed output:`, JSON.stringify(parsedOutput, null, 2)) + // Validate against schema try { const validatedOutput = this.outputSchema.parse(parsedOutput) + console.warn(`✅ ${this.name} agent schema validation succeeded`) return validatedOutput } catch (error) { - console.error(`Failed to validate ${this.name} JSON`, error) + console.error(`❌ Failed to validate ${this.name} JSON`, error) + console.error(`❌ Parsed output was:`, JSON.stringify(parsedOutput, null, 2)) throw new Error(`Failed to validate ${this.name} JSON: ${error}`) } } diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index d5ab9edda..ab380e3f9 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -280,17 +280,11 @@ export class DataCopilot { } try { - console.warn('📤 Writing ANALYZING status to stream at:', new Date().toISOString()) dataStream.writeData({ type: StreamDataType.ROUTER_STATUS, status: StreamDataStatus.ANALYZING, }) // Add padding for Cloudflare streaming threshold - dataStream.writeData({ - type: 'padding', - data: ' '.repeat(2048), - }) - console.warn('✅ ANALYZING status written to stream') const routerOutput = await this.runRouterAgent({ messages, @@ -320,14 +314,12 @@ export class DataCopilot { return } - console.warn('📤 Writing COMPLETE status to stream at:', new Date().toISOString()) dataStream.writeData({ type: StreamDataType.ROUTER_STATUS, status: StreamDataStatus.COMPLETE, reasoning: routerOutput.reasoning, reformulatedQuestion: routerOutput.reformulated_question, }) - console.warn('✅ COMPLETE status written to stream') let sqlQuery: string | undefined = undefined let pipeInstructions: PipeInstructions | undefined = undefined @@ -373,14 +365,11 @@ export class DataCopilot { dataStream, }) } catch (error) { - console.error('❌ Error in streamingAgentRequestHandler:', error) - console.warn('📤 Writing ERROR status to stream at:', new Date().toISOString()) dataStream.writeData({ type: 'router-status', status: 'error', error: error instanceof Error ? error.message : 'An error occurred', }) - console.warn('✅ ERROR status written to stream') throw error } } @@ -439,12 +428,6 @@ export class DataCopilot { reformulatedQuestion, dataStream, }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> { - console.warn('🚀 handleCreateQueryAction called with:', { - reformulatedQuestion: reformulatedQuestion?.substring(0, 100) + '...', - projectName, - pipe, - }) - const textToSqlOutput = await this.runTextToSqlAgent({ messages, date, @@ -455,24 +438,8 @@ export class DataCopilot { reformulatedQuestion, }) - console.warn('🤖 TextToSqlAgent output:', { - hasExplanation: !!textToSqlOutput.explanation, - explanationLength: textToSqlOutput.explanation?.length || 0, - hasInstructions: !!textToSqlOutput.instructions, - instructionsType: typeof textToSqlOutput.instructions, - instructionsLength: textToSqlOutput.instructions?.length || 0, - instructionsPreview: textToSqlOutput.instructions?.substring(0, 150) + '...', - }) - const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions) - console.warn('📊 Query execution result:', { - queryDataType: typeof queryData, - queryDataLength: queryData?.length || 0, - isArray: Array.isArray(queryData), - firstItemKeys: queryData?.[0] ? Object.keys(queryData[0]) : null, - }) - dataStream.writeData({ type: StreamDataType.SQL_RESULT, explanation: textToSqlOutput.explanation, @@ -480,8 +447,6 @@ export class DataCopilot { data: queryData, }) - console.warn('📤 Streamed data to client with type:', StreamDataType.SQL_RESULT) - return { sqlQuery: textToSqlOutput.instructions } } diff --git a/frontend/lib/chat/instructions.ts b/frontend/lib/chat/instructions.ts index ee6929444..428a9a37b 100644 --- a/frontend/lib/chat/instructions.ts +++ b/frontend/lib/chat/instructions.ts @@ -208,12 +208,6 @@ export async function executePipeInstructions(instructions: PipeInstructions): P // Function to execute a SQL query via TinyBird's Query API export async function executeTextToSqlInstructions(query: TextToSqlInstructions): Promise { - console.warn('🔍 executeTextToSqlInstructions called with query:', { - queryType: typeof query, - queryLength: query?.length || 0, - queryPreview: query?.substring(0, 100) + (query?.length > 100 ? '...' : ''), - }) - const tinybirdBaseUrl = process.env.NUXT_TINYBIRD_BASE_URL const tinybirdToken = process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN @@ -228,11 +222,6 @@ export async function executeTextToSqlInstructions(query: TextToSqlInstructions) const finalQuery = `${query} FORMAT JSON` params.append('q', finalQuery) - console.warn('📤 Executing TinyBird query:', { - url: `${tinybirdBaseUrl}/v0/sql`, - query: finalQuery.substring(0, 200) + (finalQuery.length > 200 ? '...' : ''), - }) - const response = await ofetch(`${tinybirdBaseUrl}/v0/sql`, { method: 'POST', headers: { @@ -242,12 +231,6 @@ export async function executeTextToSqlInstructions(query: TextToSqlInstructions) body: params.toString(), }) - console.warn('📥 TinyBird response:', { - hasData: !!response.data, - dataLength: response.data?.length || 0, - responseKeys: Object.keys(response || {}), - }) - // TinyBird SQL API response format has data array return response.data || [] } catch (error: any) { diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts index b9a49bd44..86b94c909 100644 --- a/frontend/lib/chat/prompts/text-to-sql.ts +++ b/frontend/lib/chat/prompts/text-to-sql.ts @@ -47,10 +47,11 @@ Your response must include an "instructions" field with a query string: - You may need to refine your question and call this tool again based on validation results **execute_query Tool:** -- Use for VALIDATION after generating SQL with text_to_sql +- Use ONLY for VALIDATION after generating SQL with text_to_sql - Add LIMIT 5 when validating to check the query works - If it fails, refine your natural language question and try text_to_sql again - The final returned SQL should have appropriate LIMIT (not the test LIMIT 5) +- CRITICAL: Do NOT include query results in your final JSON response - only the query string Remember: Think through the ENTIRE query before testing. Minimize iterations. @@ -77,11 +78,13 @@ Follow this step-by-step process: - If it succeeds: Proceed to return instructions with appropriate LIMIT - If it fails: Refine your natural language question and try text_to_sql again - Put maximum effort into getting it right with minimal iterations +- IMPORTANT: Validation results are NOT part of your final response - only use for verification **STEP 4: RETURN INSTRUCTIONS** - Create the instructions with the validated SQL query - Use appropriate LIMIT for final query (not the test LIMIT 5) -- Do not return the data, only the query plan +- CRITICAL: Return ONLY the SQL query string in "instructions" field - NO query results/data +- Your JSON response must contain ONLY: explanation and instructions (SQL query string) - Provide a brief explanation of your query logic # QUERY ENHANCEMENT RULES @@ -215,10 +218,12 @@ JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength() - Use validation to ensure query works before returning instructions - Use the text_to_sql tool properly with natural language questions only -**RESPONSE GUIDELINES** -- Create a clear SQL query in the instructions -- Do not return the data from the tools used, only the query plan -- Provide a brief explanation of your query selection and how it answers the question +**RESPONSE GUIDELINES - CRITICAL** +- Your JSON response must contain ONLY two fields: "explanation" and "instructions" +- "instructions" field must contain ONLY the SQL query string (no results, no data) +- "explanation" field must contain a brief explanation of your query selection +- NEVER include query results, validation data, or tool outputs in your JSON response +- The query will be executed separately - you only provide the query string IMPORTANT REMINDERS: - Use list_datasources ONCE at the beginning to understand available tables From d10ea8140bf1efd4927b2d9b8690cfb891b681e7 Mon Sep 17 00:00:00 2001 From: anil Date: Mon, 15 Sep 2025 03:23:47 +0300 Subject: [PATCH 17/21] feat: improved text-to-sql, keepalives for cf Signed-off-by: anil --- frontend/lib/chat/agents/base-agent.ts | 24 ++- frontend/lib/chat/agents/text-to-sql.ts | 178 +++++++++++++++------ frontend/lib/chat/data-copilot.ts | 84 +++++++--- frontend/lib/chat/prompts/text-to-sql.ts | 192 +++++++---------------- frontend/nuxt.config.ts | 12 +- 5 files changed, 261 insertions(+), 229 deletions(-) diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts index 2927f485f..1f362d573 100644 --- a/frontend/lib/chat/agents/base-agent.ts +++ b/frontend/lib/chat/agents/base-agent.ts @@ -160,20 +160,28 @@ export abstract class BaseAgent { // Debug logging to see what the agent actually returned console.warn(`🔍 ${this.name} agent raw response:`, text.substring(0, 500) + (text.length > 500 ? '...' : '')) - // First, try simple JSON.parse since the text usually contains valid JSON + // Try multiple parsing strategies for speed and reliability let parsedOutput try { + // Strategy 1: Direct JSON.parse (fastest) parsedOutput = JSON.parse(text) console.warn(`✅ ${this.name} agent JSON.parse succeeded`) } catch { - // Fall back to extractJSON if direct parsing fails try { - parsedOutput = extractJSON(text) - console.warn(`✅ ${this.name} agent extractJSON succeeded`) - } catch (error) { - console.error(`❌ ${this.name} agent failed to parse JSON:`, error) - console.error(`❌ Response text:`, text) - throw new Error(`${this.name} agent did not return valid JSON`) + // Strategy 2: Try parsing after trimming and cleaning + const cleanedText = text.trim().replace(/^```json\s*|\s*```$/g, '') + parsedOutput = JSON.parse(cleanedText) + console.warn(`✅ ${this.name} agent cleaned JSON.parse succeeded`) + } catch { + // Strategy 3: Fall back to extractJSON (slower but more robust) + try { + parsedOutput = extractJSON(text) + console.warn(`✅ ${this.name} agent extractJSON succeeded`) + } catch (error) { + console.error(`❌ ${this.name} agent failed to parse JSON:`, error) + console.error(`❌ Response text:`, text) + throw new Error(`${this.name} agent did not return valid JSON`) + } } } diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts index 0b1b4288c..bcb6de756 100644 --- a/frontend/lib/chat/agents/text-to-sql.ts +++ b/frontend/lib/chat/agents/text-to-sql.ts @@ -1,8 +1,6 @@ // Copyright (c) 2025 The Linux Foundation and each contributor. // SPDX-License-Identifier: MIT /* eslint-disable @typescript-eslint/no-explicit-any */ -// Copyright (c) 2025 The Linux Foundation and each contributor. -// SPDX-License-Identifier: MIT import { z } from 'zod' import { textToSqlInstructionsSchema } from '../types' import { textToSqlPrompt } from '../prompts/text-to-sql' @@ -31,10 +29,134 @@ interface TextToSqlAgentInput { } export class TextToSqlAgent extends BaseAgent { + /** + * Generate SQL query using tools and text extraction + */ + override async execute( + input: TextToSqlAgentInput & { messages: any[] } + ): Promise { + try { + const { generateText } = await import('ai') + const systemPrompt = this.getSystemPrompt(input) + const tools = this.getTools(input) + const conversationHistoryReceipt = this.generateConversationHistoryReceipt(input.messages) + + // Remove broken text_to_sql tool, keep working ones + const workingTools = { ...tools } + delete workingTools['text_to_sql'] + + const fullSystemPrompt = conversationHistoryReceipt + systemPrompt + ` + +## CRITICAL INSTRUCTIONS +1. Use tools BRIEFLY to understand schema (max 2 tool calls) +2. Then STOP calling tools and write the SQL query +3. Put your SQL in a markdown code block: \`\`\`sql ... \`\`\` +4. You MUST conclude with a final SQL query - do not keep exploring!` + + const generateConfig: any = { + model: this.getModel(input), + system: fullSystemPrompt, + tools: workingTools, + maxSteps: this.maxSteps, + temperature: this.temperature, + } + + const providerOptions = this.getProviderOptions(input) + if (providerOptions) { + generateConfig.providerOptions = providerOptions + } + + generateConfig.messages = input.messages.filter( + (msg: any) => msg.content && msg.content.trim() !== '' && msg.role === 'user', + ).slice(-1) + + const response = await generateText(generateConfig) + + if (this.shouldMonitorToolCalls(input)) { + this.logToolCalls(response) + } + + // Extract SQL from text response + const result = this.extractSqlFromTextResponse(response) + console.warn("🔍 Extracted SQL:", result.instructions) + + return { + ...result, + usage: response.usage, + } + } catch (error) { + throw this.createError(error) + } + } + + + /** + * Extract SQL query from text response when tools fail + */ + private extractSqlFromTextResponse(response: any): SqlOutput { + const text = response.text || '' + + // Look for SQL code blocks + const sqlBlockMatch = text.match(/```sql\n([\s\S]*?)\n```/i) + if (sqlBlockMatch && sqlBlockMatch[1]) { + const sqlQuery = this.cleanSqlQuery(sqlBlockMatch[1].trim()) + return { + explanation: "Generated SQL query based on database schema analysis", + instructions: sqlQuery, + } + } + + // Look for WITH or SELECT statements in the text + const withMatch = text.match(/\b(WITH[\s\S]*?ORDER BY[^;]*;?)/i) + const selectMatch = text.match(/\b(SELECT[\s\S]*?ORDER BY[^;]*;?)/i) + + if (withMatch && withMatch[1]) { + return { + explanation: "Extracted SQL query from agent response", + instructions: this.cleanSqlQuery(withMatch[1].trim()), + } + } + + if (selectMatch && selectMatch[1]) { + return { + explanation: "Extracted SQL query from agent response", + instructions: this.cleanSqlQuery(selectMatch[1].trim()), + } + } + + // Fallback: look for any SQL-like content + const generalSqlMatch = text.match(/\b((?:WITH|SELECT)[\s\S]*?)(?=\n\n|\n(?![A-Z\s,()])|$)/i) + if (generalSqlMatch && generalSqlMatch[1]) { + return { + explanation: "Extracted SQL query from agent response", + instructions: this.cleanSqlQuery(generalSqlMatch[1].trim()), + } + } + + throw new Error('Could not extract SQL query from response text') + } + + /** + * Clean SQL query for Tinybird compatibility + */ + private cleanSqlQuery(sql: string): string { + // Remove SQL comments (both line and block comments) + sql = sql.replace(/--.*$/gm, '') // Remove line comments + sql = sql.replace(/\/\*[\s\S]*?\*\//g, '') // Remove block comments + + // Remove trailing semicolon (Tinybird doesn't allow it) + sql = sql.replace(/;\s*$/, '') + + // Clean up extra whitespace + sql = sql.replace(/\s+/g, ' ').trim() + + return sql + } + readonly name = 'SQL' readonly outputSchema = sqlOutputSchema readonly temperature = 0 - readonly maxSteps = 10 // Allow multiple steps for SQL generation and execution + readonly maxSteps = 3 protected getModel(input: TextToSqlAgentInput): any { return input.model @@ -70,58 +192,12 @@ export class TextToSqlAgent extends BaseAgent { protected override getProviderOptions(_input: TextToSqlAgentInput): any { return { bedrock: { - reasoningConfig: { type: 'enabled', budgetTokens: 3000 }, + reasoningConfig: { type: 'enabled', budgetTokens: 1500 }, // Reduced from 3000 for faster responses }, } } protected override shouldMonitorToolCalls(_input: TextToSqlAgentInput): boolean { - return true // Enable tool call monitoring for SQL agent - } - - /** - * Override to add validation for text_to_sql tool calls - */ - protected override logToolCalls(response: any): void { - // Call parent method first to get normal logging - super.logToolCalls(response) - - // Add validation for text_to_sql tool calls - if (!response.steps || response.steps.length === 0) return - - for (const step of response.steps) { - if (step.toolCalls && step.toolCalls.length > 0) { - for (const call of step.toolCalls) { - if (call.toolName === 'text_to_sql') { - const question = call.args?.question || '' - - // Check if the question looks like SQL code (basic heuristic) - if (this.looksLikeSQL(question)) { - console.error(`❌ WARNING: text_to_sql tool called with SQL code instead of natural language question:`) - console.error(`Question: ${question}`) - console.error('text_to_sql tool should receive natural language questions, not SQL code') - // Don't throw error, just warn - allow the process to continue - } - } - } - } - } - } - - /** - * Basic heuristic to detect if a string looks like SQL code - */ - private looksLikeSQL(text: string): boolean { - // More specific SQL patterns that indicate actual SQL code, not natural language - const sqlPatterns = [ - /^\s*SELECT\s+/i, // Starts with SELECT - /\bFROM\s+\w+\s*$/i, // Ends with FROM table - /\bSELECT\s+.*\s+FROM\s+/i, // Contains SELECT ... FROM pattern - /\bWITH\s+\w+\s+AS\s*\(/i, // CTE pattern WITH name AS ( - /\bUNION\s+(ALL\s+)?SELECT/i, // UNION SELECT pattern - ] - - // Check for actual SQL structure patterns - return sqlPatterns.some(pattern => pattern.test(text)) + return false // Enable tool call monitoring for SQL agent } } diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts index ab380e3f9..5965ac465 100644 --- a/frontend/lib/chat/data-copilot.ts +++ b/frontend/lib/chat/data-copilot.ts @@ -185,10 +185,9 @@ export class DataCopilot { segmentId, reformulatedQuestion, }: TextToSqlAgentInput) { - const followUpTools: Record = {} - followUpTools['text_to_sql'] = this.tbTools['text_to_sql'] - followUpTools['list_datasources'] = this.tbTools['list_datasources'] - followUpTools['execute_query'] = this.tbTools['execute_query'] + const followUpTools = this.tbTools + delete followUpTools['execute_query'] + const agent = new TextToSqlAgent() return agent.execute({ @@ -250,6 +249,29 @@ export class DataCopilot { }) } + /** + * Send keepalive message to prevent Cloudflare timeout + */ + private sendKeepalive(dataStream: any, message: string): void { + dataStream.writeData({ + type: 'keepalive', + message, + timestamp: new Date().toISOString(), + }) + } + + /** + * Send progress update message + */ + private sendProgress(dataStream: any, status: string, message: string): void { + dataStream.writeData({ + type: StreamDataType.ROUTER_STATUS, + status: 'progress', + message, + timestamp: new Date().toISOString(), + }) + } + /** * Main streaming handler that orchestrates the entire AI agent workflow */ @@ -428,26 +450,42 @@ export class DataCopilot { reformulatedQuestion, dataStream, }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> { - const textToSqlOutput = await this.runTextToSqlAgent({ - messages, - date, - projectName, - pipe, - parametersString, - segmentId, - reformulatedQuestion, - }) - - const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions) - - dataStream.writeData({ - type: StreamDataType.SQL_RESULT, - explanation: textToSqlOutput.explanation, - instructions: textToSqlOutput.instructions, - data: queryData, - }) + // Send progress update before starting TextToSql agent + this.sendProgress(dataStream, 'progress', 'Analyzing database schema...') + + // Set up keepalive interval during long operation + const keepaliveInterval = setInterval(() => { + this.sendKeepalive(dataStream, 'Processing SQL query generation...') + }, 15000) // Send keepalive every 15 seconds + + try { + const textToSqlOutput = await this.runTextToSqlAgent({ + messages, + date, + projectName, + pipe, + parametersString, + segmentId, + reformulatedQuestion, + }) + + clearInterval(keepaliveInterval) + this.sendProgress(dataStream, 'progress', `SQL query generated! Executing [${textToSqlOutput.instructions}]...`) + + const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions) + + dataStream.writeData({ + type: StreamDataType.SQL_RESULT, + explanation: textToSqlOutput.explanation, + instructions: textToSqlOutput.instructions, + data: queryData, + }) - return { sqlQuery: textToSqlOutput.instructions } + return { sqlQuery: textToSqlOutput.instructions } + } catch (error) { + clearInterval(keepaliveInterval) + throw error + } } /** diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts index 86b94c909..058ac5d0f 100644 --- a/frontend/lib/chat/prompts/text-to-sql.ts +++ b/frontend/lib/chat/prompts/text-to-sql.ts @@ -8,107 +8,63 @@ export const textToSqlPrompt = ( segmentId: string | null, reformulatedQuestion: string, ) => { - const dashboardDescription = pipe - ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}` - : `Project "${projectName}"${parametersString ? ` with parameters: ${parametersString}` : ''}` - return ` -You are an expert SQL query generator that creates execution plans to answer: "${reformulatedQuestion}" - -Think step-by-step through the structured approach below. Be methodical and careful to ensure accuracy. - -# DATE AND CONTEXT -Today's date: ${date} -Current dashboard: ${dashboardDescription} -Segment ID: ${segmentId || 'not specified'} - -# YOUR TASK - -You must return instructions that describe the SQL query to execute. - -**INSTRUCTIONS STRUCTURE** -Your response must include an "instructions" field with a query string: -{ - "instructions": "SELECT ... FROM ... WHERE ..." // The complete SQL query to execute -} - -# CRITICAL TOOL USAGE RULES - -**list_datasources Tool:** -- Use ONCE at the beginning to understand available tables and schemas -- Study the schema carefully, noting column names and types -- Identify which tables contain the data you need - -**text_to_sql Tool:** -- Use with the natural language question to generate the SQL query -- Pass the user's question as the 'question' parameter -- Do NOT pass SQL code to this tool - it expects natural language questions only -- Build your understanding from schema first, then use text_to_sql -- You may need to refine your question and call this tool again based on validation results - -**execute_query Tool:** -- Use ONLY for VALIDATION after generating SQL with text_to_sql -- Add LIMIT 5 when validating to check the query works -- If it fails, refine your natural language question and try text_to_sql again -- The final returned SQL should have appropriate LIMIT (not the test LIMIT 5) -- CRITICAL: Do NOT include query results in your final JSON response - only the query string - -Remember: Think through the ENTIRE query before testing. Minimize iterations. - -# YOUR TASK - STRUCTURED APPROACH - -Follow this step-by-step process: - -**STEP 1: UNDERSTAND THE QUESTION AND READ SCHEMAS** -- Analyze what the user is asking for -- Use list_datasources to see available tables and schemas -- Study the schema carefully, noting column names and types -- Identify which tables are relevant based on the query -- Understand the available tables, columns, and relationships - -**STEP 2: GENERATE AND VALIDATE SQL QUERY** -- Use the text_to_sql tool with the reformulated question -- Pass the natural language question to the 'question' parameter -- Apply ALL query enhancement rules in your question formulation -- Ensure the question mentions segmentId filtering when applicable -- Ensure the question mentions timestamp filtering for time-based queries - -**STEP 3: VALIDATION** -- Use execute_query with the generated SQL (add LIMIT 5 for testing) -- If it succeeds: Proceed to return instructions with appropriate LIMIT -- If it fails: Refine your natural language question and try text_to_sql again -- Put maximum effort into getting it right with minimal iterations -- IMPORTANT: Validation results are NOT part of your final response - only use for verification - -**STEP 4: RETURN INSTRUCTIONS** -- Create the instructions with the validated SQL query -- Use appropriate LIMIT for final query (not the test LIMIT 5) -- CRITICAL: Return ONLY the SQL query string in "instructions" field - NO query results/data -- Your JSON response must contain ONLY: explanation and instructions (SQL query string) -- Provide a brief explanation of your query logic - -# QUERY ENHANCEMENT RULES - -**CORE PRINCIPLES:** -- For non-timeseries data, cap results at 20 unless explicitly specified -- Choose the sorting metric that makes the most sense based on the user's question -- Never return just IDs - always include names or human-readable identifiers -- Stay as close as possible to the user's request -- Single value queries should return a single row and skip null or 0 values - -**TIMESERIES DATA RULES:** -- If no time range specified: use year-to-date (YTD) as the default range -- If user asks for "YTD" or "year-to-date": use the date range from January 1st of the current year to today -- If time range specified: use appropriate granularity -- Always sort chronologically (oldest to newest) -- For trends/evolution queries: likely want cumulative data - -**FOLLOW-UP REQUEST CONSISTENCY:** -- Maintain the same time granularity as previous queries unless explicitly changed -- Preserve context from earlier queries (e.g., filters, groupings) +You need to generate a SQL query to answer: "${reformulatedQuestion}" + +Context: +- Date: ${date} +- Project: ${projectName} +- Segment ID: ${segmentId || 'not specified'} + +Instructions: +1. BRIEFLY use list_datasources to understand available tables +2. Optionally use 1-2 other tools for quick data exploration +3. THEN IMMEDIATELY write a SQL query that answers the question +4. You have maximum 3 steps - use them wisely +5. Your final response MUST contain the SQL query in markdown code block + +CRITICAL: After understanding the schema, stop using tools and write the SQL query! + +## CRITICAL ANTI-JOIN RULE +**For anti-join patterns (finding rows in A that don't exist in B), use LEFT JOIN with empty string check.** + +**IMPORTANT**: Tinybird has two key limitations: +1. NOT EXISTS with correlated subqueries is NOT supported +2. LEFT JOIN fills unmatched columns with default values (like '') instead of NULL + +Example - CORRECT (use LEFT JOIN + empty string check): +\`\`\`sql +WITH table_a_data AS ( + SELECT id, name FROM table_a WHERE condition +), +table_b_ids AS ( + SELECT DISTINCT a_id FROM table_b WHERE condition +) +SELECT a.id, a.name +FROM table_a_data a +LEFT JOIN table_b_ids b ON b.a_id = a.id +WHERE b.a_id = '' +\`\`\` + +Example - AVOID (NOT EXISTS - will fail in Tinybird): +\`\`\`sql +WHERE NOT EXISTS (SELECT 1 FROM table_b b WHERE b.a_id = a.id) +\`\`\` + +Example - AVOID (IS NULL check - will fail because Tinybird uses default values): +\`\`\`sql +LEFT JOIN table_b b ON b.a_id = a.id WHERE b.a_id IS NULL +\`\`\` # TINYBIRD SQL COMPLETE REFERENCE +## TINYBIRD LIMITATIONS +- **NOT EXISTS with correlated subqueries is completely unsupported** +- **LEFT JOIN fills unmatched columns with default values (like '') instead of NULL** +- **Use LEFT JOIN + empty string check (= '') for anti-join patterns, not IS NULL** +- **No semicolons allowed** - Queries must not end with semicolon +- **Multi-statements not allowed** - One query per request + ## ALLOWED SQL STATEMENTS - **ONLY SELECT statements are supported** - All SELECT clauses are fully supported: FROM, JOIN, WHERE, GROUP BY, ORDER BY, LIMIT, OFFSET, HAVING, WITH @@ -191,45 +147,5 @@ Note: Use lagInFrame() instead of LAG(), leadInFrame() instead of LEAD() JSONExtract(), JSONExtractString(), JSONExtractInt(), JSONExtractFloat(), JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength() -## CRITICAL CONSTRAINTS -1. **NO subqueries in FROM clause** - Use JOINs instead -2. **LIMIT is recommended** - Always include LIMIT unless you need all results -3. **Aggregations require GROUP BY** - Include all non-aggregate columns -4. **SETTINGS clause** - Goes at the very end: SETTINGS join_use_nulls = 1 -5. **Table references** - Use database.schema.table format when available - -# CRITICAL REMINDERS - -1. **Tool Usage Discipline:** - - list_datasources: Use ONCE at the beginning to understand schema - - text_to_sql: Use with natural language questions (may need refinement) - - execute_query: Use for validation with LIMIT 5 - - Do NOT call text_to_sql with SQL code - only natural language questions - - Put maximum effort into formulating questions correctly to minimize iterations - -2. **Always Apply Filters:** - - segmentId filter on activityRelations_deduplicated_cleaned_ds when applicable - - timestamp filters for time-based queries - - Use provided parameters as defaults - -3. **Efficiency:** - - Understand schema first, then generate and validate SQL - - Minimize iterations by crafting good natural language questions - - Use validation to ensure query works before returning instructions - - Use the text_to_sql tool properly with natural language questions only - -**RESPONSE GUIDELINES - CRITICAL** -- Your JSON response must contain ONLY two fields: "explanation" and "instructions" -- "instructions" field must contain ONLY the SQL query string (no results, no data) -- "explanation" field must contain a brief explanation of your query selection -- NEVER include query results, validation data, or tool outputs in your JSON response -- The query will be executed separately - you only provide the query string - -IMPORTANT REMINDERS: -- Use list_datasources ONCE at the beginning to understand available tables -- Use text_to_sql with natural language questions (NOT SQL code) -- Use execute_query with LIMIT 5 to validate the generated SQL works -- Return the validated query in the instructions field (with appropriate LIMIT) -- Formulate questions to text_to_sql correctly to minimize validation iterations -- Put MAXIMUM effort into getting the question right with minimal iterations` +Focus on understanding the data schema first, then writing an effective SQL query.` } diff --git a/frontend/nuxt.config.ts b/frontend/nuxt.config.ts index 6466b8d87..d4246bfbe 100644 --- a/frontend/nuxt.config.ts +++ b/frontend/nuxt.config.ts @@ -52,15 +52,9 @@ export default defineNuxtConfig({ insightsDbWriteHost: 'localhost', insightsDbReadHost: 'localhost', insightsDbPort: 5432, - insightsDbUsername: "postgres", - insightsDbPassword: "example", - insightsDbDatabase: "insights", - cmDbWriteHost: "localhost", - cmDbReadHost: "localhost", - cmDbPort: 5432, - cmDbUsername: "postgres", - cmDbPassword: "example", - cmDbDatabase: "crowd-web", + insightsDbUsername: 'postgres', + insightsDbPassword: 'example', + insightsDbDatabase: 'insights', // These are also exposed on the client-side public: { apiBase: '/api', From c9cc1b32cc4a6c2babcab017da1640a86e26a32d Mon Sep 17 00:00:00 2001 From: anil Date: Mon, 15 Sep 2025 03:40:49 +0300 Subject: [PATCH 18/21] chore: readd cm related keys to nuxt config Signed-off-by: anil --- frontend/nuxt.config.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/frontend/nuxt.config.ts b/frontend/nuxt.config.ts index d4246bfbe..20f8c144b 100644 --- a/frontend/nuxt.config.ts +++ b/frontend/nuxt.config.ts @@ -55,6 +55,12 @@ export default defineNuxtConfig({ insightsDbUsername: 'postgres', insightsDbPassword: 'example', insightsDbDatabase: 'insights', + cmDbWriteHost: "localhost", + cmDbReadHost: "localhost", + cmDbPort: 5432, + cmDbUsername: "postgres", + cmDbPassword: "example", + cmDbDatabase: "crowd-web", // These are also exposed on the client-side public: { apiBase: '/api', From 70f32e8a214ccc9027b53fc79db90202813a658d Mon Sep 17 00:00:00 2001 From: Efren Lim Date: Thu, 18 Sep 2025 16:17:04 +0800 Subject: [PATCH 19/21] chore: remove required pipe check Signed-off-by: Efren Lim --- frontend/server/api/chat/stream.ts | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts index 22c812054..048891d51 100644 --- a/frontend/server/api/chat/stream.ts +++ b/frontend/server/api/chat/stream.ts @@ -25,15 +25,11 @@ export default defineEventHandler(async (event): Promise => { setHeader(event, 'X-Accel-Buffering', 'no') setHeader(event, 'Content-Type', 'text/plain; charset=utf-8') setHeader(event, 'Connection', 'close') - + try { const { messages, projectName, pipe, parameters, conversationId, projectSlug } = await readBody(event) - if (!pipe) { - return createError({ statusCode: 400, statusMessage: 'Pipe is required' }) - } - if (!projectSlug) { return createError({ statusCode: 400, statusMessage: 'Project slug is required' }) } @@ -69,7 +65,7 @@ export default defineEventHandler(async (event): Promise => { userEmail: event.context.user.email, dataStream, // Pass the dataStream to the class }) - } + }, }) } catch (error) { return createError({ From 5f7fe0209c62f4336bf57f352cb17ec5aa39b301 Mon Sep 17 00:00:00 2001 From: anilb Date: Wed, 1 Oct 2025 15:13:14 +0200 Subject: [PATCH 20/21] fix: some router improvements and tests Signed-off-by: anilb --- frontend/lib/chat/prompts/router.ts | 19 +- frontend/lib/chat/tests/router.test.ts | 311 +++++++++++++++++++++++++ 2 files changed, 325 insertions(+), 5 deletions(-) create mode 100644 frontend/lib/chat/tests/router.test.ts diff --git a/frontend/lib/chat/prompts/router.ts b/frontend/lib/chat/prompts/router.ts index 1b44b310a..0c339535b 100644 --- a/frontend/lib/chat/prompts/router.ts +++ b/frontend/lib/chat/prompts/router.ts @@ -41,16 +41,24 @@ ${toolsOverview} **MANDATORY: Before checking data sources, you MUST first verify if existing pipes can handle the query** **Step 1: Check Existing Tools (HIGHEST PRIORITY)** -- **FIRST: For activity-count-related queries (stars count, forks count, commits count, etc.) → ALWAYS use activities_count or activities_cumulative_count pipes** +- **FIRST: For activity-count-related queries (stars count, forks count, commits count, etc.) → Consider activities_count or activities_cumulative_count pipes** ${pipeToolQuestion} -- IMPORTANT: Only the parameters listed in the tool's parameters are valid. You cannot add extra parameters. - - For example, adding a country code parameter to a tool that doesn't support it is invalid. +- **MANDATORY VALIDATION: Before routing to pipes, verify the pipe can FULLY answer the question:** + - **Check dimensions/groupings:** Does the query ask for breakdowns the pipe doesn't support? + * Example: "commits by company" → activities_count cannot group by company → USE create_query + * Example: "stars by country" → activities_count cannot group by country → USE create_query + - **Check parameters:** Does the pipe accept all required parameters? + * IMPORTANT: Only the parameters listed in the tool's parameters are valid. You cannot add extra parameters. + * For example, adding a country code parameter to a tool that doesn't support it is invalid. + - **Check drilldowns:** Does the query need custom aggregations or drilldowns the pipe doesn't provide? + - **If a pipe EXISTS but CANNOT answer the specific question → route to "create_query" instead** - Can other available tools answer this question? - Can a combination of tools provide the answer? - **Can the SAME tool be used multiple times with different parameters to create comparisons?** - Example: activity tools can be called once for forks, once for stars to compare them - **When user refers to activities by their types, you can use activities_count or activities_cumulative_count pipes** - Activity types include: stars, forks, commits, pull requests, issues, etc. + - BUT only if the query doesn't require custom dimensions/groupings (see validation above) - **CRITICAL: For comparative questions (e.g., "this week vs last week", "forks vs stars", "current vs previous period"):** - Check if the same tool can be called multiple times with different parameters (time ranges, activity types, etc.) - Even if the question asks for a comparison, if the underlying data can be fetched using existing tools, choose "pipes" @@ -58,7 +66,8 @@ ${pipeToolQuestion} * "active contributors this week vs last week" → use contributor tool twice with different date ranges * "cumulative forks vs stars last month" → use activity tools twice with different activity types * "active contributors vs organization this year" → active_contributors AND active_organization pipes -- **If ANY existing tool can provide the data → ALWAYS choose "pipes" action** +- **If existing tools can FULLY provide the data (including all dimensions and groupings) → choose "pipes" action** +- **If tools exist but cannot provide required dimensions/groupings/drilldowns → choose "create_query" action** **Step 2: Check Data Sources (only if Step 1 is NO)** - Use list_datasources to examine available tables and fields @@ -70,7 +79,7 @@ ${pipeToolQuestion} # ROUTING DECISIONS - "stop": The question cannot be answered with available data -- "create_query": Custom SQL query needed using available data sources +- "create_query": Custom SQL query needed using available data sources (tools can be empty) - "pipes": Existing tools can answer the question (specify which tools in the tools array) # IMPORTANT diff --git a/frontend/lib/chat/tests/router.test.ts b/frontend/lib/chat/tests/router.test.ts new file mode 100644 index 000000000..68645b4b8 --- /dev/null +++ b/frontend/lib/chat/tests/router.test.ts @@ -0,0 +1,311 @@ +// Copyright (c) 2025 The Linux Foundation and each contributor. +// SPDX-License-Identifier: MIT +/* eslint-disable @typescript-eslint/no-explicit-any */ +/** + * Router Agent Tests with Real AI Model Execution + * + * Tests router agent with actual Bedrock model and real Tinybird MCP tools + */ + +import { describe, test, expect, beforeAll, afterAll } from 'vitest' +import { createAmazonBedrock, type AmazonBedrockProvider } from '@ai-sdk/amazon-bedrock' +import { experimental_createMCPClient as createMCPClient, type LanguageModelV1 } from 'ai' +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js' + +import { RouterAgent } from '../agents/router' +import { RouterDecisionAction } from '../enums' +import type { ChatMessage, RouterAgentInput } from '../types' + +let bedrock: AmazonBedrockProvider | null = null + +describe('Router Agent', () => { + let model: LanguageModelV1 + let mcpClient: any + let tbTools: Record = {} + let toolsOverview: string = '' + + beforeAll(async () => { + // Check if we have the required environment variables + const tinybirdToken = process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN + const tinybirdBaseUrl = process.env.NUXT_TINYBIRD_BASE_URL + const hasAwsCredentials = process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID + + if (!tinybirdToken || !tinybirdBaseUrl || !hasAwsCredentials) { + console.warn('⚠️ Skipping real integration tests - missing credentials') + console.warn( + 'Required: NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN, NUXT_TINYBIRD_BASE_URL, AWS Bedrock credentials', + ) + return + } + + // Initialize AWS Bedrock model exactly like DataCopilot + bedrock = createAmazonBedrock({ + accessKeyId: process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID, + secretAccessKey: process.env.NUXT_AWS_BEDROCK_SECRET_ACCESS_KEY, + region: process.env.NUXT_AWS_BEDROCK_REGION, + }) + + // Initialize model once, like DataCopilot does in constructor + const BEDROCK_MODEL_ID = 'us.anthropic.claude-sonnet-4-20250514-v1:0' + model = bedrock(BEDROCK_MODEL_ID) + + // Initialize MCP client to get real tools - same as DataCopilot + const tbMcpUrl = `https://mcp.tinybird.co?token=${tinybirdToken}&host=${tinybirdBaseUrl}` + const url = new URL(tbMcpUrl) + + try { + mcpClient = await createMCPClient({ + transport: new StreamableHTTPClientTransport(url, { + sessionId: `test_session_${Date.now()}`, + }), + }) + + // Load real tools from Tinybird MCP + tbTools = await mcpClient.tools({}) + buildToolsOverview() + + console.warn(`✅ Connected to Tinybird MCP - ${Object.keys(tbTools).length} tools loaded`) + } catch (error) { + console.error('❌ Failed to connect to Tinybird MCP:', error) + throw error + } + }, 30000) + + afterAll(async () => { + if (mcpClient) { + try { + await mcpClient.close?.() + } catch (error) { + console.warn('Warning: Could not close MCP client:', error) + } + } + }) + + // Build tools overview exactly like DataCopilot does + function buildToolsOverview(): void { + const excludedFromOverview = new Set([ + 'explore_data', + 'execute_query', + 'text_to_sql', + 'list_endpoints', + 'list_service_datasources', + ]) + + const toolDescriptions: string[] = [] + for (const [toolName, tool] of Object.entries(tbTools)) { + if (excludedFromOverview.has(toolName)) continue + + const description = tool.description || tool.meta?.description || 'No description available' + toolDescriptions.push(`- ${toolName}: ${description}`) + } + + toolsOverview = toolDescriptions.join('\n') + } + + function createTestInput(userQuery: string): RouterAgentInput { + const messages: ChatMessage[] = [{ role: 'user', content: userQuery }] + + console.warn("📝 Creating test input for query:", userQuery) + + return { + model, + messages, + tools: tbTools, + toolsOverview, + date: new Date().toISOString().slice(0, 10), + projectName: 'test-project', + pipe: 'test-pipe', + parametersString: '{}', + segmentId: 'test-segment', + } + } + + function skipIfNoCredentials() { + const hasCredentials = + process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN && + process.env.NUXT_TINYBIRD_BASE_URL && + process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID + + if (!hasCredentials) { + console.warn('Skipping test - missing credentials') + return true + } + return false + } + + describe('Basic functionality', () => { + test('should create original router agent successfully', () => { + const router = new RouterAgent() + expect(router).toBeDefined() + expect(router.name).toBe('Router') + expect(router.temperature).toBe(0) + }) + + + test('should validate output schema for both implementations', () => { + const originalRouter = new RouterAgent() + + const validOutput = { + next_action: RouterDecisionAction.PIPES, + reasoning: 'Test reasoning', + reformulated_question: 'Test question', + tools: ['activities_count'], + } + + // Both should use the same schema + expect(originalRouter.outputSchema.safeParse(validOutput).success).toBe(true) + }) + + test('should reject invalid output for both implementations', () => { + const originalRouter = new RouterAgent() + + const invalidOutput = { + next_action: 'INVALID_ACTION', + reasoning: 'Test reasoning', + } + + // Both should reject invalid output + expect(originalRouter.outputSchema.safeParse(invalidOutput).success).toBe(false) + }) + + test('should connect to MCP and load tools', () => { + if (skipIfNoCredentials()) return + + expect(Object.keys(tbTools).length).toBeGreaterThan(0) + expect(tbTools.list_datasources).toBeDefined() + expect(toolsOverview).toContain('activities') + }) + }) + + describe('Real AI routing decisions', () => { + describe('PIPES routing', () => { + test('should route activity queries correctly to PIPE', async () => { + if (skipIfNoCredentials()) return + + const router = new RouterAgent() + const input = createTestInput('Show me commits this week') + + console.warn('🤖 Sending query to router agent: Show me commits this week') + const result = await router.execute(input) + console.warn('🤖 Router agent response:', result) + + expect(result.next_action).toBeDefined() + expect(Object.values(RouterDecisionAction)).toContain(result.next_action) + expect(RouterDecisionAction.PIPES).toBe(result.next_action) + expect(result.reasoning).toBeTruthy() + expect(result.reformulated_question).toBeTruthy() + expect(Array.isArray(result.tools)).toBe(true) + expect(result.usage.totalTokens).toBeGreaterThan(0) + + console.warn(`🔍 Activity query routed to: ${result.next_action}`) + console.warn(`🔍 Reasoning: ${result.reasoning}`) + }, 15000) + + test('should route stars query for previous week to PIPE', async () => { + if (skipIfNoCredentials()) return + + const router = new RouterAgent() + const input = createTestInput('Show me stars for the previous week') + + const result = await router.execute(input) + + expect(result.next_action).toBe(RouterDecisionAction.PIPES) + expect(result.reasoning).toBeTruthy() + expect(result.reformulated_question).toBeTruthy() + expect(Array.isArray(result.tools)).toBe(true) + + console.warn(`🔍 Stars query routed to: ${result.next_action}`) + console.warn(`🔍 Reasoning: ${result.reasoning}`) + }, 15000) + + test('should route forks query for last year to PIPE', async () => { + if (skipIfNoCredentials()) return + + const router = new RouterAgent() + const input = createTestInput('Show me forks for the last year') + + const result = await router.execute(input) + + expect(result.next_action).toBe(RouterDecisionAction.PIPES) + expect(result.reasoning).toBeTruthy() + expect(result.reformulated_question).toBeTruthy() + expect(Array.isArray(result.tools)).toBe(true) + + console.warn(`🔍 Forks query routed to: ${result.next_action}`) + console.warn(`🔍 Reasoning: ${result.reasoning}`) + }, 15000) + + test('should route companies contributing query to PIPE', async () => { + if (skipIfNoCredentials()) return + + const router = new RouterAgent() + const input = createTestInput('List of companies contributing in project') + + const result = await router.execute(input) + + expect(result.next_action).toBe(RouterDecisionAction.PIPES) + expect(result.reasoning).toBeTruthy() + expect(result.reformulated_question).toBeTruthy() + expect(Array.isArray(result.tools)).toBe(true) + + console.warn(`🔍 Companies query routed to: ${result.next_action}`) + console.warn(`🔍 Reasoning: ${result.reasoning}`) + }, 15000) + + test('should route geographical queries correctly', async () => { + if (skipIfNoCredentials()) return + + const router = new RouterAgent() + const input = createTestInput('Show me contributors from Brazil') + + const result = await router.execute(input) + + expect(result.next_action).toBeDefined() + expect(Object.values(RouterDecisionAction)).toContain(result.next_action) + expect(result.reasoning).toBeTruthy() + expect(result.reformulated_question).toContain('Brazil') + + console.warn(`🔍 Geographic query routed to: ${result.next_action}`) + console.warn(`🔍 Reasoning: ${result.reasoning}`) + }, 15000) + }) + + describe('CREATE_QUERY (TEXT_TO_SQL) routing', () => { + test('should route commit activity by company query to CREATE_QUERY', async () => { + if (skipIfNoCredentials()) return + + const router = new RouterAgent() + const input = createTestInput('Show me commit activity by company over all time period') + + const result = await router.execute(input) + + expect(result.next_action).toBe(RouterDecisionAction.CREATE_QUERY) + expect(result.reasoning).toBeTruthy() + expect(result.reformulated_question).toBeTruthy() + + console.warn(`🔍 Commit activity by company query routed to: ${result.next_action}`) + console.warn(`🔍 Reasoning: ${result.reasoning}`) + }, 15000) + }) + + describe('STOP routing', () => { + test('should route impossible queries to STOP', async () => { + if (skipIfNoCredentials()) return + + const router = new RouterAgent() + const input = createTestInput("What's the weather forecast for contributors?") + + const result = await router.execute(input) + + expect(result.next_action).toBe(RouterDecisionAction.STOP) + expect(result.reasoning).toContain('weather') + expect(result.tools).toEqual([]) + + console.warn(`🔍 Weather query routed to: ${result.next_action}`) + console.warn(`🔍 Reasoning: ${result.reasoning}`) + }, 15000) + }) + + }) + +}) From 5d113ec45d4422382822c6854190272f7ee171b0 Mon Sep 17 00:00:00 2001 From: anilb Date: Thu, 9 Oct 2025 10:04:30 +0200 Subject: [PATCH 21/21] feat: data copilot improvements (#726) Signed-off-by: anilb --- ..._addAskClarificationRouterResponseType.sql | 9 + ...411__createChatResponseAgentStepsTable.sql | 27 + ...V1759927412__makeChatResponsesNullable.sql | 18 + .../components/chat-history/chat-result.vue | 33 - .../components/results/results-section.vue | 13 +- .../copilot/store/copilot.api.service.ts | 48 +- .../shared/modules/copilot/store/test.json | 60 - .../shared/modules/copilot/store/test2.json | 378 ------ .../shared/modules/copilot/store/test3.json | 264 ----- .../modules/copilot/types/copilot.types.ts | 9 +- frontend/lib/chat/agents/auditor.ts | 43 + frontend/lib/chat/agents/base-agent.ts | 4 +- frontend/lib/chat/agents/index.ts | 3 +- frontend/lib/chat/agents/router.ts | 8 +- frontend/lib/chat/agents/text-to-sql.ts | 34 +- frontend/lib/chat/data-copilot.ts | 1009 ++++++++++++++--- frontend/lib/chat/enums.ts | 10 + frontend/lib/chat/prompts/auditor.ts | 159 +++ frontend/lib/chat/prompts/pipe.ts | 13 +- frontend/lib/chat/prompts/router.ts | 149 ++- frontend/lib/chat/prompts/text-to-sql.ts | 258 +++-- .../lib/chat/prompts/tinybird-functions.md | 78 ++ .../lib/chat/prompts/tinybird-patterns.md | 135 +++ frontend/lib/chat/tests/auditor.test.ts | 187 +++ frontend/lib/chat/tests/router.test.ts | 215 ++-- frontend/lib/chat/types.ts | 41 +- frontend/lib/chat/utils/data-summary.ts | 135 +++ frontend/nuxt.config.ts | 1 + frontend/server/api/chat/stream.ts | 13 +- frontend/server/middleware/database.ts | 13 +- frontend/server/repo/chat.repo.ts | 148 ++- 31 files changed, 2306 insertions(+), 1209 deletions(-) create mode 100644 database/migrations/V1759392166__addAskClarificationRouterResponseType.sql create mode 100644 database/migrations/V1759927411__createChatResponseAgentStepsTable.sql create mode 100644 database/migrations/V1759927412__makeChatResponsesNullable.sql delete mode 100644 frontend/app/components/shared/modules/copilot/store/test.json delete mode 100644 frontend/app/components/shared/modules/copilot/store/test2.json delete mode 100644 frontend/app/components/shared/modules/copilot/store/test3.json create mode 100644 frontend/lib/chat/agents/auditor.ts create mode 100644 frontend/lib/chat/prompts/auditor.ts create mode 100644 frontend/lib/chat/prompts/tinybird-functions.md create mode 100644 frontend/lib/chat/prompts/tinybird-patterns.md create mode 100644 frontend/lib/chat/tests/auditor.test.ts create mode 100644 frontend/lib/chat/utils/data-summary.ts diff --git a/database/migrations/V1759392166__addAskClarificationRouterResponseType.sql b/database/migrations/V1759392166__addAskClarificationRouterResponseType.sql new file mode 100644 index 000000000..84fb0e825 --- /dev/null +++ b/database/migrations/V1759392166__addAskClarificationRouterResponseType.sql @@ -0,0 +1,9 @@ +-- Drop the existing check constraint +ALTER TABLE chat_responses DROP CONSTRAINT chat_responses_router_response_check; + +-- Add the new check constraint with 'ask_clarification' +ALTER TABLE chat_responses ADD CONSTRAINT chat_responses_router_response_check + CHECK (router_response IN ('pipes', 'create_query', 'stop', 'ask_clarification')); + +-- Add clarification_question column to store the clarification question +ALTER TABLE chat_responses ADD COLUMN IF NOT EXISTS clarification_question TEXT; diff --git a/database/migrations/V1759927411__createChatResponseAgentStepsTable.sql b/database/migrations/V1759927411__createChatResponseAgentStepsTable.sql new file mode 100644 index 000000000..e1cc71511 --- /dev/null +++ b/database/migrations/V1759927411__createChatResponseAgentStepsTable.sql @@ -0,0 +1,27 @@ +-- Create enum for agent types +CREATE TYPE agent_type AS ENUM ('ROUTER', 'PIPE', 'TEXT_TO_SQL', 'AUDITOR', 'CHART', 'EXECUTE_INSTRUCTIONS'); + +-- Create table to track individual agent execution steps +CREATE TABLE IF NOT EXISTS chat_response_agent_steps ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + chat_response_id UUID NOT NULL, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + model TEXT, + agent agent_type NOT NULL, + response JSONB, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0, + response_time_seconds NUMERIC NOT NULL DEFAULT 0, + instructions TEXT, + error_message TEXT, + + CONSTRAINT fk_chat_response + FOREIGN KEY (chat_response_id) + REFERENCES chat_responses(id) + ON DELETE CASCADE +); + +-- Create indexes for efficient querying +CREATE INDEX idx_agent_steps_chat_response_id ON chat_response_agent_steps(chat_response_id); +CREATE INDEX idx_agent_steps_created_at ON chat_response_agent_steps(created_at DESC); +CREATE INDEX idx_agent_steps_agent_type ON chat_response_agent_steps(agent); diff --git a/database/migrations/V1759927412__makeChatResponsesNullable.sql b/database/migrations/V1759927412__makeChatResponsesNullable.sql new file mode 100644 index 000000000..d94efe725 --- /dev/null +++ b/database/migrations/V1759927412__makeChatResponsesNullable.sql @@ -0,0 +1,18 @@ +-- Make router fields nullable to allow early creation of chat_responses +ALTER TABLE chat_responses ALTER COLUMN router_response DROP NOT NULL; +ALTER TABLE chat_responses ALTER COLUMN router_reason DROP NOT NULL; + +-- Drop existing constraints +ALTER TABLE chat_responses DROP CONSTRAINT IF EXISTS chat_responses_router_response_check; +ALTER TABLE chat_responses DROP CONSTRAINT IF EXISTS check_pipe_instructions; + +-- Add new constraint that allows NULL or valid enum values +ALTER TABLE chat_responses ADD CONSTRAINT chat_responses_router_response_check + CHECK (router_response IS NULL OR router_response IN ('pipes', 'create_query', 'stop', 'ask_clarification')); + +-- Recreate pipe_instructions check with NULL handling +ALTER TABLE chat_responses ADD CONSTRAINT check_pipe_instructions CHECK ( + router_response IS NULL OR + (router_response = 'pipes' AND pipe_instructions IS NOT NULL) OR + (router_response != 'pipes' AND pipe_instructions IS NULL) +); diff --git a/frontend/app/components/shared/modules/copilot/components/chat-history/chat-result.vue b/frontend/app/components/shared/modules/copilot/components/chat-history/chat-result.vue index 495888bd6..b6c03b183 100644 --- a/frontend/app/components/shared/modules/copilot/components/chat-history/chat-result.vue +++ b/frontend/app/components/shared/modules/copilot/components/chat-history/chat-result.vue @@ -24,33 +24,11 @@ SPDX-License-Identifier: MIT > {{ reasoning }} -
{{ message.content }}
- - - - - -