From 29e26d383efcecf8ce41b105f06e89f06b60fa9d Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Fri, 5 Sep 2025 10:39:24 +0200
Subject: [PATCH 01/21] feat: copilot - keeping track of conversation_id

Signed-off-by: anilb <epipav@gmail.com>
---
 .gitignore                                    |  4 +-
 ...3486__addConversationIdToChatResponses.sql |  8 ++
 .../copilot/components/copilot-sidebar.vue    | 36 ++++++---
 .../copilot/store/copilot.api.service.ts      | 32 ++++++--
 .../modules/copilot/types/copilot.types.ts    |  3 +-
 frontend/lib/chat/agents/base-agent.ts        |  1 -
 frontend/lib/chat/data-copilot.ts             | 78 +++++++++----------
 frontend/server/api/chat/stream.ts            |  7 +-
 frontend/server/repo/chat.repo.ts             | 10 ++-
 9 files changed, 117 insertions(+), 62 deletions(-)
 create mode 100644 database/migrations/V1756973486__addConversationIdToChatResponses.sql
diff --git a/.gitignore b/.gitignore
index 0c25f88c9..bb828f401 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,6 @@ scripts/scaffold.yaml
 node_modules
 .prettierrc
 **/.env*
-!.env.dist
\ No newline at end of file
+!.env.dist
+database/Dockerfile.flyway
+database/flyway_migrate.sh
\ No newline at end of file
diff --git a/database/migrations/V1756973486__addConversationIdToChatResponses.sql b/database/migrations/V1756973486__addConversationIdToChatResponses.sql
new file mode 100644
index 000000000..df938665b
--- /dev/null
+++ b/database/migrations/V1756973486__addConversationIdToChatResponses.sql
@@ -0,0 +1,8 @@
+ALTER TABLE chat_responses 
+ADD COLUMN conversation_id UUID DEFAULT gen_random_uuid();
+
+-- Create index for efficient conversation queries
+CREATE INDEX idx_chat_responses_conversation_id ON chat_responses(conversation_id);
+
+-- Create index for efficient conversation + timestamp queries  
+CREATE INDEX idx_chat_responses_conversation_created_at ON chat_responses(conversation_id, created_at);
\ No newline at end of file
diff --git a/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue b/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue
index 850c2627b..8ce9fbc2e 100644
--- a/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue
+++ b/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue
@@ -139,6 +139,7 @@ const input = ref('')
 const streamingStatus = ref('')
 const error = ref('')
 const messages = ref<Array<AIMessage>>([]) // tempData as AIMessage
+const conversationId = ref<string | undefined>(undefined)
 
 const isLoading = computed<boolean>({
   get: () => props.isLoading,
@@ -176,15 +177,17 @@ const callChatApi = async (userMessage: string) => {
         copilotDefaults.value.project, 
         copilotDefaults.value.widget, 
         token.value,
-        copilotDefaults.value.params)
+        copilotDefaults.value.params,
+        conversationId.value)
 
       // Handle the streaming response
-      await copilotApiService.handleStreamingResponse(response, messages.value, (status) => {
-        streamingStatus.value = status;
-      }, (message, index) => {
-        if (index === -1) {
-          messages.value.push(message);
-        } else {
+      const returnedConversationId = await copilotApiService.handleStreamingResponse(
+        response, messages.value, (status) => {
+          streamingStatus.value = status;
+        }, (message, index) => {
+          if (index === -1) {
+            messages.value.push(message);
+          } else {
           messages.value[index] = message;
         }
 
@@ -200,10 +203,19 @@ const callChatApi = async (userMessage: string) => {
           selectedResultId.value = message.id;
         }
         scrollToEnd();
-      }, () => {
+      }, (receivedConversationId) => {
         isLoading.value = false;
         streamingStatus.value = '';
+        // Store the conversationId for subsequent calls
+        if (receivedConversationId) {
+          conversationId.value = receivedConversationId;
+        }
       });
+      
+      // Also capture conversationId from the return value as backup
+      if (returnedConversationId && !conversationId.value) {
+        conversationId.value = returnedConversationId;
+      }
     }
   } catch (err) {
     console.error('Failed to send message:', err)
@@ -233,7 +245,13 @@ const selectResult = (id: string) => {
   selectedResultId.value = id;
 }
 
-watch(copilotDefaults, (newDefaults) => {
+watch(copilotDefaults, (newDefaults, oldDefaults) => {
+  // Clear conversation when widget changes
+  if (oldDefaults && newDefaults.widget !== oldDefaults.widget) {
+    conversationId.value = undefined;
+    messages.value = [];
+  }
+  
   if (newDefaults.question) {
     callChatApi(newDefaults.question);
   }
diff --git a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
index 43c096d9a..d3a4a91eb 100644
--- a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
+++ b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
@@ -11,7 +11,7 @@ import type { Project } from '~~/types/project'
 export const tempData = testData3 as AIMessage[];
 class CopilotApiService {
   // Generate unique ID for messages
-  generateId = () => Date.now().toString(36) + Math.random().toString(36).substr(2);
+  generateId = () => Date.now().toString(36) + Math.random().toString(36).substring(2);
 
   generateTextMessage = (message: string, role: MessageRole, status: MessageStatus, type: MessagePartType = 'text') => {
     const userMessageId = this.generateId();
@@ -31,7 +31,8 @@ class CopilotApiService {
     project: Project, 
     pipe: string, 
     token: string,
-    parameters?: CopilotParams): Promise<Response> {
+    parameters?: CopilotParams,
+    conversationId?: string): Promise<Response> {
     // Prepare the request body with the correct format
     const requestBody = {
       messages: messages.map(m => ({
@@ -41,7 +42,8 @@ class CopilotApiService {
       pipe,
       segmentId: project?.id,
       projectName: project?.name,
-      parameters
+      parameters,
+      conversationId
     }
     // Send streaming request
     const response = await fetch('/api/chat/stream', {
@@ -121,8 +123,8 @@ class CopilotApiService {
     messages: Array<AIMessage>,
     statusCallBack: (status: string) => void,
     messageCallBack: (message: AIMessage, index: number) => void,
-    completionCallBack: () => void
-  ) {
+    completionCallBack: (conversationId?: string) => void
+  ): Promise<string | undefined> {
     const reader = response.body?.getReader()
     const decoder = new TextDecoder()
     
@@ -132,6 +134,7 @@ class CopilotApiService {
 
     let assistantContent = ''
     let assistantMessageId: string | null = null
+    let conversationId: string | undefined = undefined
     let lineBuffer = '' // Buffer to accumulate partial lines
     
     try {
@@ -176,13 +179,18 @@ class CopilotApiService {
           if (result) {
             assistantContent = result.assistantContent
             assistantMessageId = result.assistantMessageId
+            if (result.conversationId) {
+              conversationId = result.conversationId
+            }
           }
         }
       }
     } finally {
       reader.releaseLock()
-      completionCallBack();
+      completionCallBack(conversationId);
     }
+    
+    return conversationId
   }
 
   private processCompleteLine(
@@ -192,7 +200,7 @@ class CopilotApiService {
     messages: Array<AIMessage>,
     statusCallBack: (status: string) => void,
     messageCallBack: (message: AIMessage, index: number) => void
-  ): { assistantMessageId: string | null; assistantContent: string } | null {
+  ): { assistantMessageId: string | null; assistantContent: string; conversationId?: string } | null {
     try {
       // Parse AI SDK data stream format: "prefix:data"
       const colonIndex = line.indexOf(':')
@@ -206,6 +214,8 @@ class CopilotApiService {
       // Handle different stream prefixes
       if (prefix === '2') {
         assistantMessageId = null;
+        let capturedConversationId: string | undefined = undefined;
+        
         // Custom data events from your backend (like router-status)
         const dataArray = JSON.parse(dataString)
         for (const data of dataArray) {
@@ -236,6 +246,11 @@ class CopilotApiService {
               statusCallBack('Tool execution completed');
             }
 
+            // Capture conversationId from chat-response-id for return
+            if (data.type === 'chat-response-id' && data.conversationId) {
+              capturedConversationId = data.conversationId;
+            }
+
             const content = data.type === 'chat-response-id' ? data.id : data.explanation
 
             // Create assistant message if it doesn't exist yet
@@ -253,10 +268,13 @@ class CopilotApiService {
               content,
               explanation: data.explanation,
               instructions: data.instructions,
+              conversationId: data.conversationId,
               timestamp: Date.now()
             }, -1);
           } 
         }
+        
+        return { assistantMessageId, assistantContent, conversationId: capturedConversationId }
       } else if (prefix === '0') {
         // Text delta from streamText (streaming text content)
         const textDelta = JSON.parse(dataString)
diff --git a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
index 7cc742a36..3171c5995 100644
--- a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
+++ b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
@@ -15,7 +15,7 @@ export interface CopilotData {
 
 export type CopilotParams = Record<string, string | number | null>;
 
-export type MessagePartType = 'router-status' | 'sql-result' | 'text' | 'pipe-result' | 'chat-response-id';
+export type MessagePartType = 'router-status' | 'sql-result' | 'text' | 'pipe-result' | 'chat-response-id' | 'conversation-id';
 export type MessageStatus = 'analyzing' | 'complete' | 'error';
 export type MessageData = Record<string, string | number | null | string[] | object>;
 export type MessageRole = 'user' | 'assistant';
@@ -38,6 +38,7 @@ export interface AIMessage {
   status: MessageStatus;
   content: string; 
   timestamp: number;
+  conversationId?: string;
   sql?: string;
   data?: Array<MessageData>;
   explanation?: string;
diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts
index 08bc259c3..e5830efe4 100644
--- a/frontend/lib/chat/agents/base-agent.ts
+++ b/frontend/lib/chat/agents/base-agent.ts
@@ -104,7 +104,6 @@ export abstract class BaseAgent<TInput, TOutput> {
       const fullSystemPrompt = conversationHistoryReceipt + systemPrompt + jsonInstructions
 
       // Check if we have messages in the input
-
       const hasMessages =
         typeof input === 'object' &&
         input !== null &&
diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 94355bee9..9ee2b3f93 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -9,11 +9,8 @@ import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/
 import type { ChatResponse } from '../../server/repo/chat.repo'
 
 import { runRouterAgent } from './agents/router'
-// TODO: Uncomment once we support text-to-sql
-// import { runTextToSqlAgent } from './agents/text-to-sql';
+import { runTextToSqlAgent } from './agents/text-to-sql';
 import { runPipeAgent } from './agents/pipe'
-// TODO: Uncomment once we support text-to-sql
-// eslint-disable-next-line @typescript-eslint/no-unused-vars
 import { executePipeInstructions, executeTextToSqlInstructions } from './instructions'
 import type { ChatMessage } from './types'
 
@@ -29,6 +26,7 @@ export async function streamingAgentRequestHandler({
   projectName,
   pipe,
   parameters,
+  conversationId,
   onResponseComplete,
 }: {
   messages: ChatMessage[]
@@ -36,6 +34,7 @@ export async function streamingAgentRequestHandler({
   projectName?: string
   pipe: string
   parameters?: Record<string, unknown>
+  conversationId?: string
   onResponseComplete?: (response: ChatResponse) => Promise<string>
 }): Promise<Response> {
   const url = new URL(
@@ -132,12 +131,14 @@ export async function streamingAgentRequestHandler({
               pipeInstructions: undefined,
               sqlQuery: undefined,
               model: MODEL,
+              conversationId: conversationId,
             })
 
-            // Stream the chat response ID
+            // Stream the chat response ID and conversation ID
             dataStream.writeData({
               type: 'chat-response-id',
               id: chatResponseId,
+              conversationId: conversationId || '',
             })
           }
           return
@@ -168,12 +169,14 @@ export async function streamingAgentRequestHandler({
               pipeInstructions: undefined,
               sqlQuery: undefined,
               model: MODEL,
+              conversationId: conversationId,
             })
 
-            // Stream the chat response ID
+            // Stream the chat response ID and conversation ID
             dataStream.writeData({
               type: 'chat-response-id',
               id: chatResponseId,
+              conversationId: conversationId || '',
             })
           }
           return
@@ -187,44 +190,38 @@ export async function streamingAgentRequestHandler({
         })
 
         const followUpTools: Record<string, any> = {}
-        // TODO: Uncomment once we support text-to-sql
-        // if (routerOutput.next_action === "create_query") {
-        followUpTools['list_datasources'] = tbTools['list_datasources']
-        // TODO: Uncomment once we support text-to-sql
-        // }
-        // else {
-        for (const toolName of routerOutput.tools) {
+        if (routerOutput.next_action === "create_query") {
+          followUpTools['text_to_sql'] = tbTools['text_to_sql']
+        } else {
+          for (const toolName of routerOutput.tools) {
           if (tbTools[toolName]) {
             followUpTools[toolName] = tbTools[toolName]
           }
         }
-        // TODO: Uncomment once we support text-to-sql
-        // }
 
-        // if (routerOutput.next_action === "create_query") {
-        //   const textToSqlOutput = await runTextToSqlAgent({
-        //     model,
-        //     messages,
-        //     tools: followUpTools,
-        //     date: dateString as string,
-        //     projectName: projectName as string,
-        //     pipe,
-        //     parametersString,
-        //     segmentId: segmentId as string,
-        //     reformulatedQuestion: routerOutput.reformulated_question,
-        //   });
-
-        //   // Execute the SQL query according to the instructions
-        //   const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions);
+        if (routerOutput.next_action === "create_query") {
+          const textToSqlOutput = await runTextToSqlAgent({
+            model,
+            messages,
+            tools: followUpTools,
+            date: dateString as string,
+            projectName: projectName as string,
+            pipe,
+            parametersString,
+            segmentId: segmentId as string,
+            reformulatedQuestion: routerOutput.reformulated_question,
+          })          
+          // Execute the SQL query according to the instructions
+          const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)          
+          
+          dataStream.writeData({
+            type: "sql-result",
+            explanation: textToSqlOutput.explanation,
+            instructions: textToSqlOutput.instructions,
+            data: queryData
+          })
 
-        //   dataStream.writeData({
-        //     type: "sql-result",
-        //     explanation: textToSqlOutput.explanation,
-        //     instructions: textToSqlOutput.instructions,
-        //     data: queryData
-        //   });
-        // } else
-        if (routerOutput.next_action === 'pipes') {
+        } else if (routerOutput.next_action === 'pipes') {
           const pipeOutput = await runPipeAgent({
             model,
             messages,
@@ -274,15 +271,18 @@ export async function streamingAgentRequestHandler({
               pipeInstructions: pipeOutput.instructions,
               sqlQuery: undefined,
               model: MODEL,
+              conversationId: conversationId,
             })
 
-            // Stream the chat response ID
+            // Stream the chat response ID and conversation ID
             dataStream.writeData({
               type: 'chat-response-id',
               id: chatResponseId,
+              conversationId: conversationId || '',
             })
           }
         }
+      } 
       } catch (error) {
         dataStream.writeData({
           type: 'router-status',
diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts
index 111c58549..c49659b66 100644
--- a/frontend/server/api/chat/stream.ts
+++ b/frontend/server/api/chat/stream.ts
@@ -13,17 +13,21 @@ interface IStreamRequestBody {
   projectName?: string
   pipe: string
   parameters?: Record<string, unknown>
+  conversationId?: string
 }
 
 export default defineEventHandler(async (event): Promise<Response | Error> => {
   try {
-    const { messages, segmentId, projectName, pipe, parameters } =
+    const { messages, segmentId, projectName, pipe, parameters, conversationId } =
       await readBody<IStreamRequestBody>(event)
 
     if (!pipe) {
       return createError({ statusCode: 400, statusMessage: 'Pipe is required' })
     }
 
+    // Generate conversationId if not provided
+    const finalConversationId = conversationId || crypto.randomUUID()
+
     const dbPool = event.context.dbPool as Pool
 
     return await streamingAgentRequestHandler({
@@ -32,6 +36,7 @@ export default defineEventHandler(async (event): Promise<Response | Error> => {
       projectName,
       pipe,
       parameters,
+      conversationId: finalConversationId,
       onResponseComplete: dbPool
         ? async (response) => {
             const chatRepo = new ChatRepository(dbPool)
diff --git a/frontend/server/repo/chat.repo.ts b/frontend/server/repo/chat.repo.ts
index 3f1166282..0e60880fb 100644
--- a/frontend/server/repo/chat.repo.ts
+++ b/frontend/server/repo/chat.repo.ts
@@ -43,6 +43,7 @@ export interface PipeInstructions {
 
 export interface ChatResponse {
   id?: string
+  conversationId?: string
   userPrompt: string
   routerResponse: 'pipes' | 'text-to-sql' | 'stop'
   routerReason: string
@@ -66,12 +67,14 @@ export class ChatRepository {
         router_response, 
         router_reason, 
         pipe_instructions, 
-        sql_query, model, 
+        sql_query, 
+        model, 
         input_tokens, 
         output_tokens, 
-        feedback
+        feedback,
+        conversation_id
       )
-      VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
+      VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
       RETURNING id
     `
 
@@ -86,6 +89,7 @@ export class ChatRepository {
         response.inputTokens,
         response.outputTokens,
         null,
+        response.conversationId,
       ])
 
       return result.rows[0].id

From 6fdcb83b35982f44aed43e6d4e76c905ced54b87 Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Mon, 8 Sep 2025 12:40:36 +0200
Subject: [PATCH 02/21] feat: using conversations for history and context

Signed-off-by: anilb <epipav@gmail.com>
---
 .../copilot/components/copilot-modal.vue      |  4 +--
 .../copilot/components/copilot-sidebar.vue    | 12 ++-----
 .../components/results/chart-results.vue      |  4 +--
 .../components/results/results-section.vue    |  6 ++--
 .../copilot/store/copilot.api.service.ts      |  4 +--
 .../modules/copilot/types/copilot.types.ts    |  8 ++++-
 frontend/lib/chat/chart/generator.ts          |  6 +++-
 frontend/lib/chat/data-copilot.ts             | 15 +++++----
 frontend/server/api/chat/chart.ts             | 31 +++++++++++++++++--
 frontend/server/repo/chat.repo.ts             | 12 +++++++
 10 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/frontend/app/components/shared/modules/copilot/components/copilot-modal.vue b/frontend/app/components/shared/modules/copilot/components/copilot-modal.vue
index 4b8b7f9a8..9df91ef19 100644
--- a/frontend/app/components/shared/modules/copilot/components/copilot-modal.vue
+++ b/frontend/app/components/shared/modules/copilot/components/copilot-modal.vue
@@ -80,11 +80,11 @@ const isModalOpen = computed({
   }
 })
 
-const handleDataUpdate = (id: string, data: MessageData[], routerReasoning?: string) => {
+const handleDataUpdate = (id: string, data: MessageData[], conversationId?: string) => {
   resultData.value.push({
     id,
     data,
-    routerReasoning
+    conversationId
   });
 
 
diff --git a/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue b/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue
index 8ce9fbc2e..70f407cb2 100644
--- a/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue
+++ b/frontend/app/components/shared/modules/copilot/components/copilot-sidebar.vue
@@ -124,7 +124,7 @@ const emit = defineEmits<{
   (e: 'update:selectedResult', value: string): void;
   (e: 'update:isLoading', value: boolean): void;
   (e: 'update:error', value: string): void;
-  (e: 'update:data', id: string, value: MessageData[], routerReasoning?: string): void;
+  (e: 'update:data', id: string, value: MessageData[], conversationId?: string): void;
 }>();
 
 const { copilotDefaults, selectedResultId } = storeToRefs(useCopilotStore());
@@ -192,14 +192,8 @@ const callChatApi = async (userMessage: string) => {
         }
 
         if (message.data) {
-          // Find router reasoning from the latest router-status message in the conversation
-          const routerReasoning = messages.value
-            .slice()
-            .reverse()
-            .find(msg => msg.type === 'router-status' && msg.routerReasoning)
-            ?.routerReasoning;
-          
-          emit('update:data', message.id, message.data, routerReasoning);
+          // Pass the current conversation ID instead of extracting routerReasoning
+          emit('update:data', message.id, message.data, conversationId.value);
           selectedResultId.value = message.id;
         }
         scrollToEnd();
diff --git a/frontend/app/components/shared/modules/copilot/components/results/chart-results.vue b/frontend/app/components/shared/modules/copilot/components/results/chart-results.vue
index 1eeae31c5..9bdf170ef 100644
--- a/frontend/app/components/shared/modules/copilot/components/results/chart-results.vue
+++ b/frontend/app/components/shared/modules/copilot/components/results/chart-results.vue
@@ -78,7 +78,7 @@ const props = defineProps<{
   config: Config | null,
   isSnapshotModalOpen: boolean,
   chartErrorType?: ChartErrorType,
-  routerReasoning?: string
+  conversationId?: string
 }>()
 
 const isSnapshotModalOpen = computed({
@@ -122,7 +122,7 @@ const generateChart = async () => {
 
   isLoading.value = true;
   
-  const response = await copilotApiService.callChartApi(props.data, token.value, props.routerReasoning);
+  const response = await copilotApiService.callChartApi(props.data, token.value, props.conversationId);
   const data = await response.json();
   
   if (data.config && data.success && data.dataMapping) {
diff --git a/frontend/app/components/shared/modules/copilot/components/results/results-section.vue b/frontend/app/components/shared/modules/copilot/components/results/results-section.vue
index 2e1161979..7c3bbfc2f 100644
--- a/frontend/app/components/shared/modules/copilot/components/results/results-section.vue
+++ b/frontend/app/components/shared/modules/copilot/components/results/results-section.vue
@@ -47,7 +47,7 @@ SPDX-License-Identifier: MIT
               :config="selectedResultConfig"
               :is-snapshot-modal-open="isSnapshotModalOpen"
               :chart-error-type="selectedResultChartErrorType"
-              :router-reasoning="selectedResultRouterReasoning"
+              :conversation-id="selectedResultConversationId"
               @update:config="handleConfigUpdate"
               @update:is-loading="handleChartLoading"
               @update:is-error="handleChartError"
@@ -118,8 +118,8 @@ const selectedResultChartErrorType = computed(() => {
   return resultData.value.find(result => result.id === selectedResultId.value)?.chartErrorType;
 })
 
-const selectedResultRouterReasoning = computed(() => {
-  return resultData.value.find(result => result.id === selectedResultId.value)?.routerReasoning;
+const selectedResultConversationId = computed(() => {
+  return resultData.value.find(result => result.id === selectedResultId.value)?.conversationId;
 })
 
 const isEmpty = computed(() => {
diff --git a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
index d3a4a91eb..462e97cc4 100644
--- a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
+++ b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
@@ -65,13 +65,13 @@ class CopilotApiService {
   async callChartApi(
     sampleData: MessageData[],
     token: string,
-    routerReasoning?: string,
+    conversationId?: string,
   ): Promise<Response> {
     // Prepare the request body with the correct format
     const requestBody = {
       results: sampleData,
       userQuery: 'Generate a chart for this data',
-      routerReasoning,
+      conversationId,
     }
 
     // Send streaming request
diff --git a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
index 3171c5995..bb5110306 100644
--- a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
+++ b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
@@ -15,7 +15,12 @@ export interface CopilotData {
 
 export type CopilotParams = Record<string, string | number | null>;
 
-export type MessagePartType = 'router-status' | 'sql-result' | 'text' | 'pipe-result' | 'chat-response-id' | 'conversation-id';
+export type MessagePartType = 'router-status' | 
+                              'sql-result' | 
+                              'text' | 
+                              'pipe-result' | 
+                              'chat-response-id' | 
+                              'conversation-id';
 export type MessageStatus = 'analyzing' | 'complete' | 'error';
 export type MessageData = Record<string, string | number | null | string[] | object>;
 export type MessageRole = 'user' | 'assistant';
@@ -29,6 +34,7 @@ export interface ResultsHistory {
   chartConfig?: Config | null;
   chartErrorType?: ChartErrorType;
   routerReasoning?: string;
+  conversationId?: string;
 }
 
 export interface AIMessage {
diff --git a/frontend/lib/chat/chart/generator.ts b/frontend/lib/chat/chart/generator.ts
index f0093ee72..8248b56c0 100644
--- a/frontend/lib/chat/chart/generator.ts
+++ b/frontend/lib/chat/chart/generator.ts
@@ -68,7 +68,11 @@ export async function generateChartConfig(
       output: 'object' as const,
       schema: outputSchema,
       system:
-        'You are a data visualization expert. Create simple, effective chart configurations using the apache echarts configuration schema.',
+        `You are a data visualization expert. Create simple, effective chart configurations using the apache echarts configuration schema.
+          Make sure the generated chart configuration answers the user's question and fits the data shape.
+          ### USER QUESTION 
+              ${routerReasoning} 
+          ### END USER QUESTION`,
       prompt: createChartGenerationPrompt(dataProfile, normalizedResults, userQuery),
       temperature: 0.1,
     })
diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 9ee2b3f93..491710a6f 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -143,8 +143,8 @@ export async function streamingAgentRequestHandler({
           }
           return
         }
-
         // TODO: Remove this once we support text-to-sql
+        /*
         else if (routerOutput.next_action === 'create_query') {
           const fallbackMessage = `I'm unable to answer this question with the widgets I have access.
       But soon I will be able to construct my own queries for these questions if I have access to the necessary data.`
@@ -181,6 +181,7 @@ export async function streamingAgentRequestHandler({
           }
           return
         }
+          */
 
         dataStream.writeData({
           type: 'router-status',
@@ -190,16 +191,10 @@ export async function streamingAgentRequestHandler({
         })
 
         const followUpTools: Record<string, any> = {}
+
         if (routerOutput.next_action === "create_query") {
           followUpTools['text_to_sql'] = tbTools['text_to_sql']
-        } else {
-          for (const toolName of routerOutput.tools) {
-          if (tbTools[toolName]) {
-            followUpTools[toolName] = tbTools[toolName]
-          }
-        }
 
-        if (routerOutput.next_action === "create_query") {
           const textToSqlOutput = await runTextToSqlAgent({
             model,
             messages,
@@ -222,6 +217,10 @@ export async function streamingAgentRequestHandler({
           })
 
         } else if (routerOutput.next_action === 'pipes') {
+          for (const toolName of routerOutput.tools) {
+          if (tbTools[toolName]) {
+            followUpTools[toolName] = tbTools[toolName]
+          }
           const pipeOutput = await runPipeAgent({
             model,
             messages,
diff --git a/frontend/server/api/chat/chart.ts b/frontend/server/api/chat/chart.ts
index 487f472ba..9044aa7fe 100644
--- a/frontend/server/api/chat/chart.ts
+++ b/frontend/server/api/chat/chart.ts
@@ -1,18 +1,42 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
+import pg from 'pg'
 import { generateChartConfig, modifyChartConfig } from '../../../lib/chat/chart/generator'
+import { ChatRepository } from '../../repo/chat.repo'
 import { Result, Config, DataMapping } from '../../../lib/chat/chart/types'
 import { PipeInstructions } from '~~/lib/chat/types'
 
+
 export const maxDuration = 30
 
+// Helper function to get router reasoning from conversation
+async function getRouterReasoningFromConversation(conversationId?: string): Promise<string | undefined> {
+  if (!conversationId) return undefined
+  
+  try {
+    const pool = new pg.Pool({
+      connectionString: process.env.DATABASE_URL,
+    })
+    
+    const chatRepo = new ChatRepository(pool)
+    const latestResponse = await chatRepo.getLatestChatResponseByConversation(conversationId)
+    
+    await pool.end()
+    
+    return latestResponse?.routerReason || undefined
+  } catch (error) {
+    console.error('Error fetching router reasoning from conversation:', error)
+    return undefined
+  }
+}
+
 interface IChartRequestBody {
   results?: Result[]
   userQuery?: string
   currentConfig?: Config
   instructions?: string
   pipeInstructions?: PipeInstructions
-  routerReasoning?: string
+  conversationId?: string
 }
 
 interface ChartConfigResponse {
@@ -25,9 +49,12 @@ interface ChartConfigResponse {
 
 export default defineEventHandler(async (event): Promise<ChartConfigResponse | Error> => {
   try {
-    const { results, userQuery, currentConfig, instructions, pipeInstructions, routerReasoning } =
+    const { results, userQuery, currentConfig, instructions, pipeInstructions, conversationId } =
       await readBody<IChartRequestBody>(event)
 
+    // Get router reasoning from conversation
+    const routerReasoning = await getRouterReasoningFromConversation(conversationId)
+
     // If pipe instructions are provided, execute them first to get results
     if (pipeInstructions && !results) {
       const { executePipeInstructions } = await import('../../../lib/chat/instructions')
diff --git a/frontend/server/repo/chat.repo.ts b/frontend/server/repo/chat.repo.ts
index 0e60880fb..cd2866052 100644
--- a/frontend/server/repo/chat.repo.ts
+++ b/frontend/server/repo/chat.repo.ts
@@ -118,4 +118,16 @@ export class ChatRepository {
     const result = await this.pool.query(query, [chatResponseId])
     return result.rows.length > 0 ? result.rows[0] : null
   }
+
+  async getLatestChatResponseByConversation(conversationId: string): Promise<ChatResponse | null> {
+    const query = `
+      SELECT * FROM chat_responses 
+      WHERE conversation_id = $1 
+      ORDER BY created_at DESC 
+      LIMIT 1
+    `
+
+    const result = await this.pool.query(query, [conversationId])
+    return result.rows.length > 0 ? result.rows[0] : null
+  }
 }

From 640af08b39930b4a4add7e494b8b1c2c40bf1aae Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Thu, 11 Sep 2025 10:17:29 +0200
Subject: [PATCH 03/21] chore: refactored data-copilot for readability,
 conversation history for all agents

Signed-off-by: anilb <epipav@gmail.com>
---
 ..._alterChatResponsesRouterResponseCheck.sql |   6 +
 .../copilot/store/copilot.api.service.ts      |   2 +-
 frontend/lib/chat/agents/base-agent.ts        |  36 +-
 frontend/lib/chat/agents/index.ts             |   7 +
 frontend/lib/chat/agents/pipe.ts              |  10 -
 frontend/lib/chat/agents/router.ts            |  32 -
 frontend/lib/chat/agents/text-to-sql.ts       |   9 -
 frontend/lib/chat/chart/analysis.ts           |  29 +-
 frontend/lib/chat/chart/generator.ts          |  65 +-
 frontend/lib/chat/data-copilot.ts             | 735 ++++++++++++------
 frontend/lib/chat/enums.ts                    |  19 +
 frontend/lib/chat/types.ts                    |  44 +-
 frontend/nuxt.config.ts                       |   6 +
 frontend/server/api/chat/feedback/[id].ts     |   6 +-
 frontend/server/api/chat/stream.ts            |  39 +-
 frontend/server/middleware/database.ts        |   5 +-
 frontend/server/repo/chat.repo.ts             |   3 +-
 frontend/server/repo/insightsProjects.repo.ts |  67 ++
 frontend/server/utils/db.ts                   |  30 +-
 19 files changed, 780 insertions(+), 370 deletions(-)
 create mode 100644 database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql
 create mode 100644 frontend/lib/chat/agents/index.ts
 create mode 100644 frontend/lib/chat/enums.ts
 create mode 100644 frontend/server/repo/insightsProjects.repo.ts

diff --git a/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql b/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql
new file mode 100644
index 000000000..7c6044389
--- /dev/null
+++ b/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql
@@ -0,0 +1,6 @@
+-- Drop the existing check constraint
+ALTER TABLE chat_responses DROP CONSTRAINT chat_responses_router_response_check;
+
+-- Add the new check constraint with 'create_query' instead of 'text-to-sql'
+ALTER TABLE chat_responses ADD CONSTRAINT chat_responses_router_response_check 
+    CHECK (router_response IN ('pipes', 'create_query', 'stop'));
\ No newline at end of file
diff --git a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
index 462e97cc4..f571d3773 100644
--- a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
+++ b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
@@ -40,7 +40,7 @@ class CopilotApiService {
         content: m.content
       })),
       pipe,
-      segmentId: project?.id,
+      projectSlug: project?.slug,
       projectName: project?.name,
       parameters,
       conversationId
diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts
index e5830efe4..0e1be535c 100644
--- a/frontend/lib/chat/agents/base-agent.ts
+++ b/frontend/lib/chat/agents/base-agent.ts
@@ -14,10 +14,8 @@ export abstract class BaseAgent<TInput, TOutput> {
   abstract readonly temperature: number
   abstract readonly maxSteps: number
 
-  protected getConversationHistory<T extends object>(
-    input: { messages: ChatMessage[] } & T,
-  ): string {
-    const userMessages = input.messages.filter((m) => m.role === 'user')
+  protected getConversationHistory(messages: ChatMessage[]): string {
+    const userMessages = messages.filter((m) => m.role === 'user')
     if (userMessages.length > 1) {
       return JSON.stringify(userMessages.slice(0, -1), null, 2)
     }
@@ -91,7 +89,7 @@ export abstract class BaseAgent<TInput, TOutput> {
     return 'value'
   }
 
-  async execute(input: TInput): Promise<TOutput & { usage?: any }> {
+  async execute(input: TInput & { messages: ChatMessage[] }): Promise<TOutput & { usage?: any }> {
     try {
       const systemPrompt = await this.getSystemPrompt(input)
       const userPrompt = this.getUserPrompt(input)
@@ -99,7 +97,7 @@ export abstract class BaseAgent<TInput, TOutput> {
 
       // Append JSON format instructions to system prompt
       const jsonInstructions = this.generateJSONInstructions()
-      const conversationHistoryReceipt = this.generateConversationHistoryReceipt(input)
+      const conversationHistoryReceipt = this.generateConversationHistoryReceipt(input.messages)
 
       const fullSystemPrompt = conversationHistoryReceipt + systemPrompt + jsonInstructions
 
@@ -150,6 +148,7 @@ export abstract class BaseAgent<TInput, TOutput> {
         usage: response.usage,
       }
     } catch (error) {
+      // TODO:: Potentially a good place to catch zod validation errors and retryinh
       throw this.createError(error)
     }
   }
@@ -189,10 +188,33 @@ export abstract class BaseAgent<TInput, TOutput> {
     }
   }
 
+  protected generateConversationHistoryReceipt(messages: ChatMessage[]): string {
+    try {
+      const conversationHistory = this.getConversationHistory(messages)
+
+      if (!conversationHistory || conversationHistory.trim() === '') {
+        return ''
+      }
+
+      return `
+      
+      ## CONVERSATION HISTORY (FOR CONTEXT ONLY)
+
+      The following is the conversation history leading up to the current question. \n\n
+      Use this ONLY for context and understanding. Do NOT attempt to answer previous questions.
+
+      ${conversationHistory}
+
+      ## END OF CONVERSATION HISTORY`
+    } catch (error) {
+      console.error('Error generating conversation history context', error)
+      return ''
+    }
+  }
+
   protected abstract getModel(input: TInput): any
   protected abstract getSystemPrompt(input: TInput): string | Promise<string>
   protected abstract getUserPrompt(input: TInput): string
-  protected abstract generateConversationHistoryReceipt(input: TInput): string
   protected abstract getTools(input: TInput): Record<string, any>
   protected abstract createError(error: unknown): Error
 
diff --git a/frontend/lib/chat/agents/index.ts b/frontend/lib/chat/agents/index.ts
new file mode 100644
index 000000000..4091c8ac4
--- /dev/null
+++ b/frontend/lib/chat/agents/index.ts
@@ -0,0 +1,7 @@
+// Copyright (c) 2025 The Linux Foundation and each contributor.
+// SPDX-License-Identifier: MIT
+import { PipeAgent } from './pipe'
+import { RouterAgent } from './router'
+import { TextToSqlAgent } from './text-to-sql'
+
+export { PipeAgent, RouterAgent, TextToSqlAgent }
diff --git a/frontend/lib/chat/agents/pipe.ts b/frontend/lib/chat/agents/pipe.ts
index a2dcc8633..a0bb805fc 100644
--- a/frontend/lib/chat/agents/pipe.ts
+++ b/frontend/lib/chat/agents/pipe.ts
@@ -40,10 +40,6 @@ export class PipeAgent extends BaseAgent<PipeAgentInput, PipeOutput> {
     return input.tools
   }
 
-  protected generateConversationHistoryReceipt(_input: PipeAgentInput): string {
-    return ''
-  }
-
   protected createError(error: unknown): Error {
     if (error instanceof Error) {
       return new Error(`Pipe agent error: ${error.message}`)
@@ -51,9 +47,3 @@ export class PipeAgent extends BaseAgent<PipeAgentInput, PipeOutput> {
     return new Error(`Pipe agent error: ${String(error)}`)
   }
 }
-
-// Convenience function to maintain backward compatibility
-export async function runPipeAgent(params: PipeAgentInput): Promise<PipeOutput> {
-  const agent = new PipeAgent()
-  return agent.execute(params)
-}
diff --git a/frontend/lib/chat/agents/router.ts b/frontend/lib/chat/agents/router.ts
index d04d0afdb..d7a7a01fc 100644
--- a/frontend/lib/chat/agents/router.ts
+++ b/frontend/lib/chat/agents/router.ts
@@ -1,8 +1,6 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
 /* eslint-disable @typescript-eslint/no-explicit-any */
-// Copyright (c) 2025 The Linux Foundation and each contributor.
-// SPDX-License-Identifier: MIT
 import type { RouterOutput, RouterAgentInput } from '../types'
 import { routerOutputSchema } from '../types'
 import { routerPrompt } from '../prompts/router'
@@ -34,30 +32,6 @@ export class RouterAgent extends BaseAgent<RouterAgentInput, RouterOutput> {
     return ''
   }
 
-  protected generateConversationHistoryReceipt(input: RouterAgentInput): string {
-    try {
-      const conversationHistory = this.getConversationHistory(input)
-
-      if (!conversationHistory || conversationHistory.trim() === '') {
-        return ''
-      }
-
-      return `
-      
-      ## CONVERSATION HISTORY (FOR CONTEXT ONLY)
-
-      The following is the conversation history leading up to the current question. \n\n
-      Use this ONLY for context and understanding. Do NOT attempt to answer previous questions.
-
-      ${conversationHistory}
-
-      ## END OF CONVERSATION HISTORY`
-    } catch (error) {
-      console.error('Error generating conversation history context', error)
-      return ''
-    }
-  }
-
   protected getTools(input: RouterAgentInput): Record<string, any> {
     // Only allow calling list_datasources; all other tools remain visible in prompt via toolsOverview
     const allowed: Record<string, any> = {}
@@ -74,9 +48,3 @@ export class RouterAgent extends BaseAgent<RouterAgentInput, RouterOutput> {
     return new Error(`Router agent error: ${String(error)}`)
   }
 }
-
-// Convenience function to maintain backward compatibility
-export async function runRouterAgent(params: RouterAgentInput): Promise<RouterOutput> {
-  const agent = new RouterAgent()
-  return agent.execute(params)
-}
diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts
index e3c02f463..9cce9b4a5 100644
--- a/frontend/lib/chat/agents/text-to-sql.ts
+++ b/frontend/lib/chat/agents/text-to-sql.ts
@@ -75,16 +75,7 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
     }
   }
 
-  protected generateConversationHistoryReceipt(_input: TextToSqlAgentInput): string {
-    return ''
-  }
-
   protected override shouldMonitorToolCalls(_input: TextToSqlAgentInput): boolean {
     return true // Enable tool call monitoring for SQL agent
   }
 }
-
-export async function runTextToSqlAgent(params: TextToSqlAgentInput): Promise<SqlOutput> {
-  const agent = new TextToSqlAgent()
-  return agent.execute(params)
-}
diff --git a/frontend/lib/chat/chart/analysis.ts b/frontend/lib/chat/chart/analysis.ts
index 37050c299..6df2ebe5b 100644
--- a/frontend/lib/chat/chart/analysis.ts
+++ b/frontend/lib/chat/chart/analysis.ts
@@ -188,12 +188,25 @@ export function pivotLongToWide(
 
 function detectLeaderboardFromReasoning(routerReasoning?: string): boolean {
   if (!routerReasoning) return false
-  
+
   const leaderboardKeywords = [
-    'leaderboard', 'ranking', 'rank', 'top', 'leading', 'highest', 'lowest', 
-    'leaders', 'ranked', 'ranking', 'position', 'standings', 'table', 'best', 'worst'
+    'leaderboard',
+    'ranking',
+    'rank',
+    'top',
+    'leading',
+    'highest',
+    'lowest',
+    'leaders',
+    'ranked',
+    'ranking',
+    'position',
+    'standings',
+    'table',
+    'best',
+    'worst',
   ]
-  
+
   const reasoningText = routerReasoning.toLowerCase()
   return leaderboardKeywords.some((keyword) => reasoningText.includes(keyword))
 }
@@ -220,7 +233,11 @@ function prioritizeMetricForLeaderboard(numericColumns: ColumnProfile[]): string
   return numericColumns[0]?.name || null
 }
 
-function detectComparisonScenario(columns: ColumnProfile[], userQuestion: string, routerReasoning?: string) {
+function detectComparisonScenario(
+  columns: ColumnProfile[],
+  userQuestion: string,
+  routerReasoning?: string,
+) {
   const numericColumns = columns.filter((c) => c.type === 'numeric')
   const columnNames = columns.map((c) => c.name.toLowerCase())
 
@@ -240,7 +257,7 @@ function detectComparisonScenario(columns: ColumnProfile[], userQuestion: string
           type: 'leaderboard' as const,
           primaryColumns: [primaryMetric],
           secondaryColumns: secondaryMetrics,
-        }
+        },
       }
     }
   }
diff --git a/frontend/lib/chat/chart/generator.ts b/frontend/lib/chat/chart/generator.ts
index 8248b56c0..1117af639 100644
--- a/frontend/lib/chat/chart/generator.ts
+++ b/frontend/lib/chat/chart/generator.ts
@@ -20,7 +20,11 @@ const bedrock = createAmazonBedrock({
   region: process.env.NUXT_AWS_BEDROCK_REGION,
 })
 
-export type ChartConfig = { config: Config | null; dataMapping: DataMapping[] | null; isMetric?: boolean }
+export type ChartConfig = {
+  config: Config | null
+  dataMapping: DataMapping[] | null
+  isMetric?: boolean
+}
 
 // Color arrays for different chart types and data point counts
 const chartColors = {
@@ -67,8 +71,7 @@ export async function generateChartConfig(
       model,
       output: 'object' as const,
       schema: outputSchema,
-      system:
-        `You are a data visualization expert. Create simple, effective chart configurations using the apache echarts configuration schema.
+      system: `You are a data visualization expert. Create simple, effective chart configurations using the apache echarts configuration schema.
           Make sure the generated chart configuration answers the user's question and fits the data shape.
           ### USER QUESTION 
               ${routerReasoning} 
@@ -97,9 +100,9 @@ export async function generateChartConfig(
       chartConfig.yAxis.inverse = true
 
       if (chartConfig.grid) {
-        chartConfig.grid.left = "0.2%"
+        chartConfig.grid.left = '0.2%'
       }
-      chartConfig.series.map((s) => s.seriesLayoutBy = 'column')
+      chartConfig.series.map((s) => (s.seriesLayoutBy = 'column'))
     }
 
     // Apply default colors if not already set
@@ -292,7 +295,7 @@ function generateFallbackConfig(profile: any): Config {
     },
     ...(type !== 'pie' && {
       // For leaderboard, swap axes to create horizontal bars
-      xAxis: isLeaderboard 
+      xAxis: isLeaderboard
         ? {
             type: 'value',
             name: yKeys.length === 1 ? yKeys[0] : 'Value',
@@ -340,22 +343,33 @@ function generateFallbackConfig(profile: any): Config {
             axisTick: { show: false },
           }
         : useDualAxis
-        ? [
-            {
-              type: 'value',
-              name: primaryKeys.join(' / '),
-              position: 'left',
-              axisLabel: {
-                fontSize: 12,
-                fontWeight: 'normal',
-                color: lfxColors.neutral[400],
-                fontFamily: 'Inter',
+          ? [
+              {
+                type: 'value',
+                name: primaryKeys.join(' / '),
+                position: 'left',
+                axisLabel: {
+                  fontSize: 12,
+                  fontWeight: 'normal',
+                  color: lfxColors.neutral[400],
+                  fontFamily: 'Inter',
+                },
               },
-            },
-            {
+              {
+                type: 'value',
+                name: secondaryKeys.join(' / '),
+                position: 'right',
+                axisLabel: {
+                  fontSize: 12,
+                  fontWeight: 'normal',
+                  color: lfxColors.neutral[400],
+                  fontFamily: 'Inter',
+                },
+              },
+            ]
+          : {
               type: 'value',
-              name: secondaryKeys.join(' / '),
-              position: 'right',
+              name: yKeys.length === 1 ? yKeys[0] : 'Value',
               axisLabel: {
                 fontSize: 12,
                 fontWeight: 'normal',
@@ -363,17 +377,6 @@ function generateFallbackConfig(profile: any): Config {
                 fontFamily: 'Inter',
               },
             },
-          ]
-        : {
-            type: 'value',
-            name: yKeys.length === 1 ? yKeys[0] : 'Value',
-            axisLabel: {
-              fontSize: 12,
-              fontWeight: 'normal',
-              color: lfxColors.neutral[400],
-              fontFamily: 'Inter',
-            },
-          },
       grid: {
         left: '8%',
         right: useDualAxis ? '15%' : '8%',
diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 491710a6f..03033230b 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -1,18 +1,29 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
 /* eslint-disable @typescript-eslint/no-explicit-any */
-// Copyright (c) 2025 The Linux Foundation and each contributor.
-// SPDX-License-Identifier: MIT
 import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'
-import { experimental_createMCPClient as createMCPClient, createDataStreamResponse } from 'ai'
+import {
+  experimental_createMCPClient as createMCPClient,
+  createDataStreamResponse,
+  type LanguageModelV1,
+} from 'ai'
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
+import type { Pool } from 'pg'
 import type { ChatResponse } from '../../server/repo/chat.repo'
+import { ChatRepository } from '../../server/repo/chat.repo'
 
-import { runRouterAgent } from './agents/router'
-import { runTextToSqlAgent } from './agents/text-to-sql';
-import { runPipeAgent } from './agents/pipe'
+import { TextToSqlAgent, PipeAgent, RouterAgent } from './agents'
 import { executePipeInstructions, executeTextToSqlInstructions } from './instructions'
-import type { ChatMessage } from './types'
+import type {
+  DataCopilotQueryInput,
+  PipeAgentInput,
+  PipeAgentStreamInput,
+  PipeInstructions,
+  RouterAgentInput,
+  TextToSqlAgentInput,
+  TextToSqlAgentStreamInput,
+} from './types'
+import { RouterDecisionAction, StreamDataStatus, StreamDataType } from './enums'
 
 const bedrock = createAmazonBedrock({
   accessKeyId: process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID,
@@ -20,277 +31,503 @@ const bedrock = createAmazonBedrock({
   region: process.env.NUXT_AWS_BEDROCK_REGION,
 })
 
-export async function streamingAgentRequestHandler({
-  messages,
-  segmentId,
-  projectName,
-  pipe,
-  parameters,
-  conversationId,
-  onResponseComplete,
-}: {
-  messages: ChatMessage[]
-  segmentId?: string
-  projectName?: string
-  pipe: string
-  parameters?: Record<string, unknown>
-  conversationId?: string
-  onResponseComplete?: (response: ChatResponse) => Promise<string>
-}): Promise<Response> {
-  const url = new URL(
-    `https://mcp.tinybird.co?token=${process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN}&host=${process.env.NUXT_TINYBIRD_BASE_URL}`,
-  )
-
-  const mcpClient = await createMCPClient({
-    transport: new StreamableHTTPClientTransport(url, {
-      sessionId: `session_${Date.now()}`,
-    }),
-  })
-
-  const MODEL = 'us.anthropic.claude-sonnet-4-20250514-v1:0'
-
-  const tbTools = await mcpClient.tools({})
-  // Build a human-readable overview of all tools for the router's prompt (read-only catalog)
-  const excludedFromOverview = new Set([
-    'explore_data',
-    'execute_query',
-    'text_to_sql',
-    'list_endpoints',
-    'list_service_datasources',
-  ])
-  const toolsOverview = Object.entries(tbTools)
-    .filter(([name]) => !excludedFromOverview.has(name))
-    .map(([name, def]: [string, any]) => {
-      try {
-        const description = def?.description || def?.meta?.description || ''
-        const inputSchema = def?.inputSchema || def?.parameters || def?.schema || undefined
-        const params = inputSchema ? JSON.stringify(inputSchema, null, 2) : undefined
-        return [`- ${name}: ${description}`, params ? `  params: ${params}` : undefined]
-          .filter(Boolean)
-          .join('\n')
-      } catch {
-        return `- ${name}`
-      }
-    })
-    .join('\n')
-  const parametersString = JSON.stringify(parameters || {})
-  const dateString = new Date().toISOString().split('T')[0]
-  const model = bedrock(MODEL)
-
-  return createDataStreamResponse({
-    execute: async (dataStream) => {
-      const responseData = {
-        question: messages[messages.length - 1]?.content || '',
-        answer: '',
-        reasoning: '',
-        explanation: '',
-        data: null as any,
-        inputTokens: 0,
-        outputTokens: 0,
-      }
+export class DataCopilot {
+  /** MCP client for communicating with Tinybird services */
+  private mcpClient: any
 
-      try {
-        dataStream.writeData({
-          type: 'router-status',
-          status: 'analyzing',
-        })
-
-        const routerOutput = await runRouterAgent({
-          model,
-          messages,
-          tools: tbTools,
-          toolsOverview,
-          date: dateString as string,
-          projectName: projectName as string,
-          pipe,
-          parametersString,
-          segmentId: segmentId as string,
-        })
-        // Accumulate token usage from router
-        if (routerOutput.usage) {
-          responseData.inputTokens += routerOutput.usage.promptTokens || 0
-          responseData.outputTokens += routerOutput.usage.completionTokens || 0
-        }
+  /** Available Tinybird tools loaded from MCP server */
+  private tbTools: Record<string, any> = {}
 
-        if (routerOutput.next_action === 'stop') {
-          responseData.reasoning = `Router Decision: ${routerOutput.next_action}\nReasoning: ${routerOutput.reasoning}`
-          responseData.answer = routerOutput.reasoning
-          dataStream.writeData({
-            type: 'router-status',
-            status: 'complete',
-            reasoning: routerOutput.reasoning,
-          })
+  /** Human-readable overview of tools for router agent decision making */
+  private toolsOverview: string = ''
 
-          if (onResponseComplete) {
-            const chatResponseId = await onResponseComplete({
-              userPrompt: responseData.question,
-              inputTokens: responseData.inputTokens,
-              outputTokens: responseData.outputTokens,
-              routerResponse: 'stop',
-              routerReason: routerOutput.reasoning,
-              pipeInstructions: undefined,
-              sqlQuery: undefined,
-              model: MODEL,
-              conversationId: conversationId,
-            })
+  /** Tinybird MCP server URL */
+  private tbMcpUrl: string = ''
 
-            // Stream the chat response ID and conversation ID
-            dataStream.writeData({
-              type: 'chat-response-id',
-              id: chatResponseId,
-              conversationId: conversationId || '',
-            })
-          }
-          return
-        }
-        // TODO: Remove this once we support text-to-sql
-        /*
-        else if (routerOutput.next_action === 'create_query') {
-          const fallbackMessage = `I'm unable to answer this question with the widgets I have access.
-      But soon I will be able to construct my own queries for these questions if I have access to the necessary data.`
-
-          responseData.answer = fallbackMessage
-          responseData.reasoning = `Router Decision: ${routerOutput.next_action}\n
-                                    Reasoning: ${routerOutput.reasoning}\n
-                                    Fallback: Text-to-SQL not yet supported`
-          dataStream.writeData({
-            type: 'router-status',
-            status: 'complete',
-            reasoning: fallbackMessage,
-          })
+  /** Amazon Bedrock language model instance */
+  private model: LanguageModelV1
 
-          if (onResponseComplete) {
-            const chatResponseId = await onResponseComplete({
-              userPrompt: responseData.question,
-              inputTokens: responseData.inputTokens,
-              outputTokens: responseData.outputTokens,
-              routerResponse: 'text-to-sql',
-              routerReason: routerOutput.reasoning,
-              pipeInstructions: undefined,
-              sqlQuery: undefined,
-              model: MODEL,
-              conversationId: conversationId,
-            })
+  /** Bedrock model identifier */
+  private readonly BEDROCK_MODEL_ID = 'us.anthropic.claude-sonnet-4-20250514-v1:0'
 
-            // Stream the chat response ID and conversation ID
-            dataStream.writeData({
-              type: 'chat-response-id',
-              id: chatResponseId,
-              conversationId: conversationId || '',
-            })
-          }
-          return
+  constructor() {
+    this.model = bedrock(this.BEDROCK_MODEL_ID)
+    this.tbMcpUrl = `https://mcp.tinybird.co?token=${process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN}&host=${process.env.NUXT_TINYBIRD_BASE_URL}`
+  }
+
+  /**
+   * Initialize MCP client connection and load Tinybird tools
+   */
+  async initialize(): Promise<void> {
+    const url = new URL(this.tbMcpUrl)
+
+    this.mcpClient = await createMCPClient({
+      transport: new StreamableHTTPClientTransport(url, {
+        sessionId: `session_${Date.now()}`,
+      }),
+    })
+
+    this.tbTools = await this.mcpClient.tools({})
+    this.buildToolsOverview()
+  }
+
+  /**
+   * Build human-readable overview of available tools for the router agent
+   */
+  private buildToolsOverview(): void {
+    const excludedFromOverview = new Set([
+      'explore_data',
+      'execute_query',
+      'text_to_sql',
+      'list_endpoints',
+      'list_service_datasources',
+    ])
+
+    this.toolsOverview = Object.entries(this.tbTools)
+      .filter(([name]) => !excludedFromOverview.has(name))
+      .map(([name, def]: [string, any]) => {
+        try {
+          const description = def?.description || def?.meta?.description || ''
+          const inputSchema = def?.inputSchema || def?.parameters || def?.schema || undefined
+          const params = inputSchema ? JSON.stringify(inputSchema, null, 2) : undefined
+          return [`- ${name}: ${description}`, params ? `  params: ${params}` : undefined]
+            .filter(Boolean)
+            .join('\n')
+        } catch {
+          return `- ${name}`
         }
-          */
+      })
+      .join('\n')
+  }
 
-        dataStream.writeData({
-          type: 'router-status',
-          status: 'complete',
-          reasoning: routerOutput.reasoning,
-          reformulatedQuestion: routerOutput.reformulated_question,
-        })
+  /**
+   * Save chat response to database
+   */
+  private async saveChatResponse(
+    response: ChatResponse,
+    insightsDbPool: Pool,
+    userEmail: string,
+  ): Promise<string> {
+    const chatRepo = new ChatRepository(insightsDbPool)
+    return await chatRepo.saveChatResponse(response, userEmail)
+  }
 
-        const followUpTools: Record<string, any> = {}
+  /**
+   * Executes the router agent to analyze user queries and determine the optimal processing strategy.
+   * The router acts as the decision-making component that routes requests to either SQL generation
+   * or data pipeline processing based on query complexity and intent.
+   *
+   * @param messages - User conversation history providing context for the query
+   * @param date - Current date string for time-based query filtering
+   * @param projectName - Project identifier for data scoping and access control
+   * @param pipe - Main data endpoint or pipeline identifier
+   * @param parametersString - Additional query parameters serialized as JSON
+   * @param segmentId - Data segment filter for multi-tenant data access
+   * @returns Router decision with next action, reasoning, and selected tools
+   */
+  private async runRouterAgent({
+    messages,
+    date,
+    projectName,
+    pipe,
+    parametersString,
+    segmentId,
+  }: Omit<RouterAgentInput, 'toolsOverview' | 'model' | 'tools'>) {
+    const agent = new RouterAgent()
+    return agent.execute({
+      model: this.model,
+      messages,
+      tools: this.tbTools,
+      toolsOverview: this.toolsOverview,
+      date,
+      projectName,
+      pipe,
+      parametersString,
+      segmentId,
+    })
+  }
 
-        if (routerOutput.next_action === "create_query") {
-          followUpTools['text_to_sql'] = tbTools['text_to_sql']
+  /**
+   * Executes the text-to-SQL agent to convert natural language questions into executable SQL queries.
+   * This agent understands database schemas, applies proper filtering, and generates optimized queries
+   * for direct data access when users need raw data rather than processed analytics.
+   *
+   * @param messages - Original conversation context for understanding query intent
+   * @param date - Current date for constructing time-based WHERE conditions
+   * @param projectName - Project context for database table scoping
+   * @param pipe - Data source identifier for table selection
+   * @param parametersString - Additional query parameters for filtering
+   * @param segmentId - Segment identifier for multi-tenant data filtering
+   * @param reformulatedQuestion - Clarified question from router agent for better SQL generation
+   * @returns SQL query string with explanation and token usage metrics
+   */
+  private async runTextToSqlAgent({
+    messages,
+    date,
+    projectName,
+    pipe,
+    parametersString,
+    segmentId,
+    reformulatedQuestion,
+  }: TextToSqlAgentInput) {
+    const followUpTools: Record<string, any> = {}
+    followUpTools['text_to_sql'] = this.tbTools['text_to_sql']
+    followUpTools['list_datasources'] = this.tbTools['list_datasources']
 
-          const textToSqlOutput = await runTextToSqlAgent({
-            model,
-            messages,
-            tools: followUpTools,
-            date: dateString as string,
-            projectName: projectName as string,
-            pipe,
-            parametersString,
-            segmentId: segmentId as string,
-            reformulatedQuestion: routerOutput.reformulated_question,
-          })          
-          // Execute the SQL query according to the instructions
-          const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)          
-          
+    const agent = new TextToSqlAgent()
+    return agent.execute({
+      model: this.model,
+      messages,
+      tools: followUpTools,
+      date,
+      projectName,
+      pipe,
+      parametersString,
+      segmentId,
+      reformulatedQuestion,
+    })
+  }
+
+  /**
+   * Executes the pipe agent to generate tinybird pipeline instructions.
+   * This agent designs multi-step workflows that use one or more tinybird pipes.
+   * Each pipe is used to answer specific parts of the user's analytical question.
+   *
+   * @param messages - Original conversation context for understanding analytical requirements
+   * @param date - Current date for time-based data filtering in pipeline steps
+   * @param projectName - Project identifier for data access and pipeline scoping
+   * @param pipe - Primary pipeline identifier for data source selection
+   * @param parametersString - Additional processing parameters for pipeline configuration
+   * @param segmentId - Segment filter for multi-tenant pipeline execution
+   * @param reformulatedQuestion - Refined analytical question from router agent
+   * @param toolNames - Selected Tinybird tools for pipeline construction (e.g., aggregation, transformation tools)
+   * @returns Pipeline instructions with processing steps, column definitions, and explanation
+   */
+  private async runPipeAgent({
+    messages,
+    date,
+    projectName,
+    pipe,
+    parametersString,
+    segmentId,
+    reformulatedQuestion,
+    toolNames,
+  }: Omit<PipeAgentInput, 'model' | 'tools'>) {
+    const followUpTools: Record<string, any> = {}
+    for (const toolName of toolNames) {
+      if (this.tbTools[toolName]) {
+        followUpTools[toolName] = this.tbTools[toolName]
+      }
+    }
+    const agent = new PipeAgent()
+    return agent.execute({
+      model: this.model,
+      messages,
+      tools: followUpTools,
+      date,
+      projectName,
+      pipe,
+      parametersString,
+      segmentId,
+      reformulatedQuestion,
+      toolNames,
+    })
+  }
+
+  /**
+   * Main streaming handler that orchestrates the entire AI agent workflow
+   */
+  async streamingAgentRequestHandler({
+    messages,
+    segmentId,
+    projectName,
+    pipe,
+    parameters,
+    conversationId,
+    insightsDbPool,
+    userEmail,
+  }: DataCopilotQueryInput): Promise<Response> {
+    const parametersString = JSON.stringify(parameters || {})
+    const date = new Date().toISOString().slice(0, 10)
+
+    return createDataStreamResponse({
+      execute: async (dataStream) => {
+        const responseData: ChatResponse = {
+          userPrompt: messages[messages.length - 1]?.content || '',
+          inputTokens: 0,
+          outputTokens: 0,
+          model: this.BEDROCK_MODEL_ID,
+          conversationId: conversationId || '',
+          routerResponse: RouterDecisionAction.STOP,
+          routerReason: '',
+          pipeInstructions: undefined as PipeInstructions | undefined,
+          sqlQuery: undefined as string | undefined,
+        }
+
+        try {
           dataStream.writeData({
-            type: "sql-result",
-            explanation: textToSqlOutput.explanation,
-            instructions: textToSqlOutput.instructions,
-            data: queryData
+            type: StreamDataType.ROUTER_STATUS,
+            status: StreamDataStatus.ANALYZING,
           })
 
-        } else if (routerOutput.next_action === 'pipes') {
-          for (const toolName of routerOutput.tools) {
-          if (tbTools[toolName]) {
-            followUpTools[toolName] = tbTools[toolName]
-          }
-          const pipeOutput = await runPipeAgent({
-            model,
+          const routerOutput = await this.runRouterAgent({
             messages,
-            tools: followUpTools,
-            date: dateString as string,
+            date,
             projectName: projectName as string,
             pipe,
             parametersString,
             segmentId: segmentId as string,
-            reformulatedQuestion: routerOutput.reformulated_question,
-            toolNames: routerOutput.tools,
           })
 
-          // Accumulate token usage from pipe agent
-          if (pipeOutput.usage) {
-            responseData.inputTokens += pipeOutput.usage.promptTokens || 0
-            responseData.outputTokens += pipeOutput.usage.completionTokens || 0
+          // Accumulate token usage from router
+          if (routerOutput.usage) {
+            responseData.inputTokens += routerOutput.usage.promptTokens || 0
+            responseData.outputTokens += routerOutput.usage.completionTokens || 0
           }
 
-          // Execute the pipes according to the instructions and combine results
-          const combinedData = await executePipeInstructions(pipeOutput.instructions)
-
-          responseData.explanation = pipeOutput.explanation
-          responseData.answer = pipeOutput.explanation
-          responseData.reasoning = `Router Decision: ${routerOutput.next_action}\n
-                                    Router Reasoning: ${routerOutput.reasoning}\n
-                                    Tools Selected: ${routerOutput.tools ? routerOutput.tools.join(', ') : 'none'}\n
-                                    Reformulated Question: ${routerOutput.reformulated_question}\n
-                                    Pipe Agent Explanation: ${pipeOutput.explanation}`
-
-          responseData.data = combinedData
+          if (routerOutput.next_action === RouterDecisionAction.STOP) {
+            await this.handleStopAction(
+              messages[messages.length - 1]?.content || '',
+              routerOutput,
+              responseData,
+              dataStream,
+              insightsDbPool,
+              userEmail,
+              conversationId,
+            )
+            return
+          }
 
           dataStream.writeData({
-            type: 'pipe-result',
-            explanation: pipeOutput.explanation,
-            instructions: pipeOutput.instructions,
-            data: combinedData,
+            type: StreamDataType.ROUTER_STATUS,
+            status: StreamDataStatus.COMPLETE,
+            reasoning: routerOutput.reasoning,
+            reformulatedQuestion: routerOutput.reformulated_question,
           })
 
-          if (onResponseComplete) {
-            const chatResponseId = await onResponseComplete({
-              userPrompt: responseData.question,
-              inputTokens: responseData.inputTokens,
-              outputTokens: responseData.outputTokens,
-              routerResponse: 'pipes',
-              routerReason: routerOutput.reasoning,
-              pipeInstructions: pipeOutput.instructions,
-              sqlQuery: undefined,
-              model: MODEL,
-              conversationId: conversationId,
-            })
+          let sqlQuery: string | undefined = undefined
+          let pipeInstructions: PipeInstructions | undefined = undefined
 
-            // Stream the chat response ID and conversation ID
-            dataStream.writeData({
-              type: 'chat-response-id',
-              id: chatResponseId,
-              conversationId: conversationId || '',
+          if (routerOutput.next_action === RouterDecisionAction.CREATE_QUERY) {
+            const result = await this.handleCreateQueryAction({
+              messages,
+              date,
+              projectName: projectName as string,
+              pipe,
+              parametersString,
+              segmentId: segmentId as string,
+              reformulatedQuestion: routerOutput.reformulated_question,
+              dataStream,
+            })
+            sqlQuery = result.sqlQuery
+          } else if (routerOutput.next_action === RouterDecisionAction.PIPES) {
+            const result = await this.handlePipesAction({
+              messages,
+              date,
+              projectName: projectName as string,
+              pipe,
+              parametersString,
+              segmentId: segmentId as string,
+              reformulatedQuestion: routerOutput.reformulated_question,
+              toolNames: routerOutput.tools,
+              dataStream,
+              responseData,
+              routerOutput,
             })
+            pipeInstructions = result.pipeInstructions
           }
+
+          await this.handleResponseComplete({
+            userPrompt: messages[messages.length - 1]?.content || '',
+            responseData,
+            routerOutput,
+            pipeInstructions,
+            sqlQuery,
+            conversationId,
+            insightsDbPool,
+            userEmail,
+            dataStream,
+          })
+        } catch (error) {
+          dataStream.writeData({
+            type: 'router-status',
+            status: 'error',
+            error: error instanceof Error ? error.message : 'An error occurred',
+          })
+          throw error
         }
-      } 
-      } catch (error) {
-        dataStream.writeData({
-          type: 'router-status',
-          status: 'error',
-          error: error instanceof Error ? error.message : 'An error occurred',
-        })
-
-        throw error
-      }
-    },
-  })
+      },
+    })
+  }
+
+  /**
+   * Handle router 'stop' action - send final response without further processing
+   */
+  private async handleStopAction(
+    userPrompt: string,
+    routerOutput: any,
+    responseData: any,
+    dataStream: any,
+    insightsDbPool: Pool,
+    userEmail: string,
+    conversationId?: string,
+  ): Promise<void> {
+    responseData.reasoning = `Router Decision: ${routerOutput.next_action}\nReasoning: ${routerOutput.reasoning}`
+    responseData.answer = routerOutput.reasoning
+
+    dataStream.writeData({
+      type: StreamDataType.ROUTER_STATUS,
+      status: StreamDataStatus.COMPLETE,
+      reasoning: routerOutput.reasoning,
+    })
+
+    const chatResponseId = await this.saveChatResponse(
+      {
+        userPrompt,
+        inputTokens: responseData.inputTokens,
+        outputTokens: responseData.outputTokens,
+        routerResponse: RouterDecisionAction.STOP,
+        routerReason: routerOutput.reasoning,
+        pipeInstructions: undefined,
+        sqlQuery: undefined,
+        model: this.BEDROCK_MODEL_ID,
+        conversationId: conversationId,
+      },
+      insightsDbPool,
+      userEmail,
+    )
+
+    dataStream.writeData({
+      type: StreamDataType.CHAT_RESPONSE_ID,
+      id: chatResponseId,
+      conversationId: conversationId || '',
+    })
+  }
+
+  /**
+   * Handle router 'create_query' action - generate and execute SQL query
+   */
+  private async handleCreateQueryAction({
+    messages,
+    date,
+    projectName,
+    pipe,
+    parametersString,
+    segmentId,
+    reformulatedQuestion,
+    dataStream,
+  }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> {
+    const textToSqlOutput = await this.runTextToSqlAgent({
+      messages,
+      date,
+      projectName,
+      pipe,
+      parametersString,
+      segmentId,
+      reformulatedQuestion,
+    })
+
+    const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)
+
+    dataStream.writeData({
+      type: StreamDataType.SQL_RESULT,
+      explanation: textToSqlOutput.explanation,
+      instructions: textToSqlOutput.instructions,
+      data: queryData,
+    })
+
+    return { sqlQuery: textToSqlOutput.instructions }
+  }
+
+  /**
+   * Handle router 'pipes' action - generate and execute pipe instructions
+   */
+  private async handlePipesAction({
+    messages,
+    date,
+    projectName,
+    pipe,
+    parametersString,
+    segmentId,
+    reformulatedQuestion,
+    toolNames,
+    dataStream,
+    responseData,
+  }: PipeAgentStreamInput): Promise<{ pipeInstructions: PipeInstructions }> {
+    const pipeOutput = await this.runPipeAgent({
+      messages,
+      date,
+      projectName,
+      pipe,
+      parametersString,
+      segmentId: segmentId as string,
+      reformulatedQuestion,
+      toolNames,
+    })
+
+    // Accumulate token usage from pipe agent
+    if (pipeOutput.usage) {
+      responseData.inputTokens += pipeOutput.usage.promptTokens || 0
+      responseData.outputTokens += pipeOutput.usage.completionTokens || 0
+    }
+
+    // Execute the pipes according to the instructions and combine results
+    const combinedData = await executePipeInstructions(pipeOutput.instructions)
+
+    dataStream.writeData({
+      type: StreamDataType.PIPE_RESULT,
+      explanation: pipeOutput.explanation,
+      instructions: pipeOutput.instructions,
+      data: combinedData,
+    })
+
+    return { pipeInstructions: pipeOutput.instructions }
+  }
+
+  /**
+   * Save final response to database and stream chat response ID
+   */
+  private async handleResponseComplete({
+    userPrompt,
+    responseData,
+    routerOutput,
+    pipeInstructions,
+    sqlQuery,
+    conversationId,
+    insightsDbPool,
+    userEmail,
+    dataStream,
+  }: {
+    userPrompt: string
+    responseData: ChatResponse
+    routerOutput: any
+    pipeInstructions?: PipeInstructions
+    sqlQuery?: string
+    conversationId?: string
+    insightsDbPool: Pool
+    userEmail: string
+    dataStream: any
+  }): Promise<void> {
+    const chatResponseId = await this.saveChatResponse(
+      {
+        userPrompt,
+        inputTokens: responseData.inputTokens,
+        outputTokens: responseData.outputTokens,
+        routerResponse: routerOutput.next_action,
+        routerReason: routerOutput.reasoning,
+        pipeInstructions,
+        sqlQuery,
+        model: this.BEDROCK_MODEL_ID,
+        conversationId: conversationId,
+      },
+      insightsDbPool,
+      userEmail,
+    )
+
+    dataStream.writeData({
+      type: StreamDataType.CHAT_RESPONSE_ID,
+      id: chatResponseId,
+      conversationId: conversationId || '',
+    })
+  }
 }
diff --git a/frontend/lib/chat/enums.ts b/frontend/lib/chat/enums.ts
new file mode 100644
index 000000000..32192e973
--- /dev/null
+++ b/frontend/lib/chat/enums.ts
@@ -0,0 +1,19 @@
+// Copyright (c) 2025 The Linux Foundation and each contributor.
+// SPDX-License-Identifier: MIT
+export enum StreamDataType {
+  ROUTER_STATUS = 'router-status',
+  CHAT_RESPONSE_ID = 'chat-response-id',
+  SQL_RESULT = 'sql-result',
+  PIPE_RESULT = 'pipe-result',
+}
+
+export enum StreamDataStatus {
+  ANALYZING = 'analyzing',
+  COMPLETE = 'complete',
+}
+
+export enum RouterDecisionAction {
+  STOP = 'stop',
+  CREATE_QUERY = 'create_query',
+  PIPES = 'pipes',
+}
diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts
index 557108969..5f155e1f6 100644
--- a/frontend/lib/chat/types.ts
+++ b/frontend/lib/chat/types.ts
@@ -1,7 +1,10 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
 /* eslint-disable @typescript-eslint/no-explicit-any */
+import type { DataStreamWriter } from 'ai'
+import type { Pool } from 'pg'
 import { z } from 'zod'
+import type { ChatResponse } from '~~/server/repo/chat.repo'
 
 // ============================================
 // Pipe Instruction Types
@@ -109,7 +112,7 @@ export type PipeOutput = z.infer<typeof pipeOutputSchema> & { usage?: any }
 // ============================================
 
 export interface ChatMessage {
-  content: string,
+  content: string
   role: string
 }
 
@@ -125,6 +128,13 @@ export interface RouterAgentInput {
   segmentId: string | null
 }
 
+export interface PipeAgentStreamInput extends Omit<PipeAgentInput, 'model' | 'tools' | 'date'> {
+  dataStream: DataStreamWriter
+  date: string
+  responseData: ChatResponse
+  routerOutput: RouterOutput
+}
+
 export interface PipeAgentInput {
   model: any // Bedrock model instance
   messages: ChatMessage[]
@@ -137,3 +147,35 @@ export interface PipeAgentInput {
   reformulatedQuestion: string
   toolNames: string[] // Array of tool names from router
 }
+
+export interface DataCopilotQueryInput {
+  messages: ChatMessage[]
+  segmentId?: string
+  projectName?: string
+  pipe: string
+  parameters?: Record<string, unknown>
+  conversationId?: string
+  insightsDbPool: Pool
+  userEmail: string
+}
+
+export interface TextToSqlAgentInput {
+  messages: ChatMessage[]
+  date: string
+  projectName: string
+  pipe: string
+  parametersString: string
+  segmentId: string
+  reformulatedQuestion: string
+}
+
+export interface TextToSqlAgentStreamInput {
+  messages: ChatMessage[]
+  date: string
+  projectName: string
+  pipe: string
+  parametersString: string
+  segmentId: string
+  reformulatedQuestion: string
+  dataStream: any
+}
diff --git a/frontend/nuxt.config.ts b/frontend/nuxt.config.ts
index 9801729f5..51d22f51a 100644
--- a/frontend/nuxt.config.ts
+++ b/frontend/nuxt.config.ts
@@ -54,6 +54,12 @@ export default defineNuxtConfig({
     insightsDbUsername: "postgres",
     insightsDbPassword: "example",
     insightsDbDatabase: "insights",
+    cmDbWriteHost: "localhost",
+    cmDbReadHost: "localhost",
+    cmDbPort: 5432,
+    cmDbUsername: "postgres",
+    cmDbPassword: "example",
+    cmDbDatabase: "crowd-web",
     // These are also exposed on the client-side
     public: {
       apiBase: "/api",
diff --git a/frontend/server/api/chat/feedback/[id].ts b/frontend/server/api/chat/feedback/[id].ts
index 293032180..5a0a00d1c 100644
--- a/frontend/server/api/chat/feedback/[id].ts
+++ b/frontend/server/api/chat/feedback/[id].ts
@@ -29,12 +29,12 @@ export default defineEventHandler(async (event): Promise<IFeedbackRequestRespons
       return createError({ statusCode: 400, statusMessage: 'Feedback must be 0, 1, or null' })
     }
 
-    const dbPool = event.context.dbPool as Pool
-    if (!dbPool) {
+    const insightsDbPool = event.context.insightsDbPool as Pool
+    if (!insightsDbPool) {
       return createError({ statusCode: 500, statusMessage: 'Database connection not available' })
     }
 
-    const chatRepo = new ChatRepository(dbPool)
+    const chatRepo = new ChatRepository(insightsDbPool)
     const updated = await chatRepo.updateChatFeedback(chatResponseId, feedback)
 
     if (!updated) {
diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts
index c49659b66..337e05988 100644
--- a/frontend/server/api/chat/stream.ts
+++ b/frontend/server/api/chat/stream.ts
@@ -1,15 +1,15 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
 import type { Pool } from 'pg'
-import { streamingAgentRequestHandler } from '../../../lib/chat/data-copilot'
-import { ChatRepository } from '../../repo/chat.repo'
+import { DataCopilot } from '~~/lib/chat/data-copilot'
+import { InsightsProjectsRepository } from '~~/server/repo/insightsProjects.repo'
 import { ChatMessage } from '~~/lib/chat/types'
 
 export const maxDuration = 30
 
 interface IStreamRequestBody {
   messages: ChatMessage[]
-  segmentId?: string
+  projectSlug?: string
   projectName?: string
   pipe: string
   parameters?: Record<string, unknown>
@@ -18,31 +18,44 @@ interface IStreamRequestBody {
 
 export default defineEventHandler(async (event): Promise<Response | Error> => {
   try {
-    const { messages, segmentId, projectName, pipe, parameters, conversationId } =
+    const { messages, projectName, pipe, parameters, conversationId, projectSlug } =
       await readBody<IStreamRequestBody>(event)
 
     if (!pipe) {
       return createError({ statusCode: 400, statusMessage: 'Pipe is required' })
     }
 
+    if (!projectSlug) {
+      return createError({ statusCode: 400, statusMessage: 'Project slug is required' })
+    }
+
     // Generate conversationId if not provided
     const finalConversationId = conversationId || crypto.randomUUID()
 
-    const dbPool = event.context.dbPool as Pool
+    const insightsDbPool = event.context.insightsDbPool as Pool
+    const cmDbPool = event.context.cmDbPool as Pool
+
+    // find project by slug to get the segmentId
+    const insightsProjectsRepo = new InsightsProjectsRepository(cmDbPool)
+
+    const insightsProjects = await insightsProjectsRepo.findInsightsProjectsBySlug(projectSlug)
+
+    if (!insightsProjects) {
+      return createError({ statusCode: 404, statusMessage: 'Project not found' })
+    }
+
+    const dataCopilot = new DataCopilot()
+    await dataCopilot.initialize()
 
-    return await streamingAgentRequestHandler({
+    return dataCopilot.streamingAgentRequestHandler({
       messages,
-      segmentId,
+      segmentId: insightsProjects.segmentId,
       projectName,
       pipe,
       parameters,
       conversationId: finalConversationId,
-      onResponseComplete: dbPool
-        ? async (response) => {
-            const chatRepo = new ChatRepository(dbPool)
-            return await chatRepo.saveChatResponse(response, event.context.user.email)
-          }
-        : undefined,
+      insightsDbPool,
+      userEmail: event.context.user.email,
     })
   } catch (error) {
     return createError({
diff --git a/frontend/server/middleware/database.ts b/frontend/server/middleware/database.ts
index 45b5fda64..6f4dfcb6e 100644
--- a/frontend/server/middleware/database.ts
+++ b/frontend/server/middleware/database.ts
@@ -1,11 +1,12 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
-import { getDbPool } from '../utils/db';
+import { getInsightsDbPool, getCMDbPool } from '../utils/db';
 
 export default defineEventHandler(async (event) => {
   // Only apply to chat endpoints
   if (event.node.req.url?.startsWith('/api/chat/')) {
     // Add the database pool to the event context
-    event.context.dbPool = getDbPool();
+    event.context.insightsDbPool = getInsightsDbPool();
+    event.context.cmDbPool = getCMDbPool();
   }
 });
\ No newline at end of file
diff --git a/frontend/server/repo/chat.repo.ts b/frontend/server/repo/chat.repo.ts
index cd2866052..da56fe989 100644
--- a/frontend/server/repo/chat.repo.ts
+++ b/frontend/server/repo/chat.repo.ts
@@ -1,6 +1,7 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
 import type { Pool } from 'pg'
+import { RouterDecisionAction } from '~~/lib/chat/enums'
 
 export interface PipeInput {
   endDate?: string
@@ -45,7 +46,7 @@ export interface ChatResponse {
   id?: string
   conversationId?: string
   userPrompt: string
-  routerResponse: 'pipes' | 'text-to-sql' | 'stop'
+  routerResponse: RouterDecisionAction
   routerReason: string
   pipeInstructions?: PipeInstructions
   sqlQuery?: string
diff --git a/frontend/server/repo/insightsProjects.repo.ts b/frontend/server/repo/insightsProjects.repo.ts
new file mode 100644
index 000000000..986cde312
--- /dev/null
+++ b/frontend/server/repo/insightsProjects.repo.ts
@@ -0,0 +1,67 @@
+// Copyright (c) 2025 The Linux Foundation and each contributor.
+// SPDX-License-Identifier: MIT
+import type { Pool } from 'pg'
+
+export interface InsightsProject {
+  id: string
+  name: string
+  description?: string
+  segmentId: string
+  createdAt: string
+  updatedAt: string
+  logoUrl?: string
+  organizationId: string
+  website?: string
+  github?: string
+  linkedin?: string
+  twitter?: string
+  widgets: string[]
+  repositories: string[]
+  slug: string
+  enabled: boolean
+  isLF: boolean
+  keywords: string[]
+  searchKeywords: string[]
+  deletedAt?: string
+}
+
+export class InsightsProjectsRepository {
+  constructor(private pool: Pool) {}
+
+  async findInsightsProjectsBySlug(slug: string): Promise<InsightsProject | null> {
+    try {
+      const query = `
+        SELECT 
+          id,
+          name,
+          description,
+          "segmentId",
+          "createdAt",
+          "updatedAt",
+          "logoUrl",
+          "organizationId",
+          website,
+          github,
+          linkedin,
+          twitter,
+          widgets,
+          repositories,
+          slug,
+          enabled,
+          "isLF",
+          keywords,
+          "searchKeywords",
+          "deletedAt"
+        FROM "insightsProjects"
+        WHERE slug = $1 AND "deletedAt" IS NULL
+      `
+
+      const result = await this.pool.query(query, [slug])
+      return result.rows.length > 0 ? result.rows[0] : null
+    } catch (error) {
+      console.error('Error finding insights project by slug:', error)
+      throw new Error('Could not find insights project')
+    }
+  }
+
+}
diff --git a/frontend/server/utils/db.ts b/frontend/server/utils/db.ts
index df67c46eb..e129c04e3 100644
--- a/frontend/server/utils/db.ts
+++ b/frontend/server/utils/db.ts
@@ -3,12 +3,13 @@
 import { Pool } from 'pg';
 import { isLocal } from './common';
 
-let pool: Pool | null = null;
+let insightsDbPool: Pool | null = null;
+let cmDbPool: Pool | null = null;
 
-export function getDbPool(): Pool {
+export function getInsightsDbPool(): Pool {
     const config = useRuntimeConfig()
-  if (!pool) {
-    pool = new Pool({
+  if (!insightsDbPool) {
+    insightsDbPool = new Pool({
       host: config.insightsDbWriteHost,
       port: config.insightsDbPort,
       database: config.insightsDbDatabase,
@@ -20,5 +21,24 @@ export function getDbPool(): Pool {
       ssl: isLocal ? false : { rejectUnauthorized: false },
     });
   }
-  return pool;
+  return insightsDbPool;
+}
+
+
+export function getCMDbPool(): Pool {
+    const config = useRuntimeConfig()
+  if (!cmDbPool) {
+    cmDbPool = new Pool({
+      host: config.cmDbWriteHost,
+      port: config.cmDbPort,
+      database: config.cmDbDatabase,
+      user: config.cmDbUsername,
+      password: config.cmDbPassword,
+      max: 20,
+      idleTimeoutMillis: 30000,
+      connectionTimeoutMillis: 2000,
+      ssl: isLocal ? false : { rejectUnauthorized: false },
+    });
+  }
+  return cmDbPool;
 }
\ No newline at end of file

From 80a9198136ae216a8f327861f1e57fd0c4622511 Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Thu, 11 Sep 2025 10:43:09 +0200
Subject: [PATCH 04/21] chore: migrate script can point to host network now
 using a param for running remote migs

Signed-off-by: anilb <epipav@gmail.com>
---
 database/migrate.sh | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/database/migrate.sh b/database/migrate.sh
index 1b577fb58..7f1da72ac 100755
--- a/database/migrate.sh
+++ b/database/migrate.sh
@@ -2,7 +2,14 @@
 set -ex
 set +o history
 
-# Grab all command line arguments to pass them into Docker, or default to "migrate".
+# Check if first argument is --host-network
+DOCKER_NETWORK=""
+if [ "$1" = "--host-network" ]; then
+    DOCKER_NETWORK="--network host"
+    shift  # Remove --host-network from arguments
+fi
+
+# Grab remaining command line arguments to pass them into Docker, or default to "migrate".
 if [ $# -eq 0 ]; then
     FLYWAY_COMMAND=("migrate")
 else
@@ -11,7 +18,7 @@ fi
 
 echo "Running Flyway command: ${FLYWAY_COMMAND[@]} on jdbc:postgresql://${PGHOST}:${PGPORT}/${PGDATABASE}"
 
-docker run --rm \
+docker run --rm ${DOCKER_NETWORK} \
   -v "$(pwd)/migrations:/tmp/migrations" \
   flyway/flyway:latest-alpine \
   -locations="filesystem:/tmp/migrations" \

From 689508985e4980a33b4ac1e684bacb22c969e6d9 Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Thu, 11 Sep 2025 10:43:27 +0200
Subject: [PATCH 05/21] chore: check alter migration updates old enum keys
 before adding the new one

Signed-off-by: anilb <epipav@gmail.com>
---
 .../V1756973487__alterChatResponsesRouterResponseCheck.sql     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql b/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql
index 7c6044389..f9d4d2785 100644
--- a/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql
+++ b/database/migrations/V1756973487__alterChatResponsesRouterResponseCheck.sql
@@ -1,6 +1,9 @@
 -- Drop the existing check constraint
 ALTER TABLE chat_responses DROP CONSTRAINT chat_responses_router_response_check;
 
+UPDATE chat_responses SET router_response = 'create_query' 
+WHERE router_response = 'text-to-sql';
+
 -- Add the new check constraint with 'create_query' instead of 'text-to-sql'
 ALTER TABLE chat_responses ADD CONSTRAINT chat_responses_router_response_check 
     CHECK (router_response IN ('pipes', 'create_query', 'stop'));
\ No newline at end of file

From f4d1cdbdfd85e7ee0e01aa943e974b5d06a9e09b Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Thu, 11 Sep 2025 11:44:41 +0200
Subject: [PATCH 06/21] chore: pass pg pool properly

Signed-off-by: anilb <epipav@gmail.com>
---
 frontend/server/api/chat/chart.ts | 35 +++++++++++++++++++------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/frontend/server/api/chat/chart.ts b/frontend/server/api/chat/chart.ts
index 9044aa7fe..22fa0ec0e 100644
--- a/frontend/server/api/chat/chart.ts
+++ b/frontend/server/api/chat/chart.ts
@@ -1,28 +1,26 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
-import pg from 'pg'
+import { Pool } from 'pg'
 import { generateChartConfig, modifyChartConfig } from '../../../lib/chat/chart/generator'
 import { ChatRepository } from '../../repo/chat.repo'
 import { Result, Config, DataMapping } from '../../../lib/chat/chart/types'
 import { PipeInstructions } from '~~/lib/chat/types'
 
-
 export const maxDuration = 30
 
 // Helper function to get router reasoning from conversation
-async function getRouterReasoningFromConversation(conversationId?: string): Promise<string | undefined> {
+async function getRouterReasoningFromConversation(
+  pool: Pool,
+  conversationId?: string,
+): Promise<string | undefined> {
   if (!conversationId) return undefined
-  
+
   try {
-    const pool = new pg.Pool({
-      connectionString: process.env.DATABASE_URL,
-    })
-    
     const chatRepo = new ChatRepository(pool)
     const latestResponse = await chatRepo.getLatestChatResponseByConversation(conversationId)
-    
+
     await pool.end()
-    
+
     return latestResponse?.routerReason || undefined
   } catch (error) {
     console.error('Error fetching router reasoning from conversation:', error)
@@ -53,7 +51,10 @@ export default defineEventHandler(async (event): Promise<ChartConfigResponse | E
       await readBody<IChartRequestBody>(event)
 
     // Get router reasoning from conversation
-    const routerReasoning = await getRouterReasoningFromConversation(conversationId)
+    const routerReasoning = await getRouterReasoningFromConversation(
+      event.context.insightsDbPool as Pool,
+      conversationId,
+    )
 
     // If pipe instructions are provided, execute them first to get results
     if (pipeInstructions && !results) {
@@ -69,7 +70,11 @@ export default defineEventHandler(async (event): Promise<ChartConfigResponse | E
           })
         }
 
-        const chartGeneration = await generateChartConfig(executedResults as Result[], userQuery, routerReasoning)
+        const chartGeneration = await generateChartConfig(
+          executedResults as Result[],
+          userQuery,
+          routerReasoning,
+        )
 
         return {
           success: true,
@@ -117,7 +122,11 @@ export default defineEventHandler(async (event): Promise<ChartConfigResponse | E
       })
     }
 
-    const chartGeneration = await generateChartConfig(results as Result[], userQuery, routerReasoning)
+    const chartGeneration = await generateChartConfig(
+      results as Result[],
+      userQuery,
+      routerReasoning,
+    )
 
     return {
       success: true,

From ebb815a947e64fec882fedc2fc1a62710a5a6b49 Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Thu, 11 Sep 2025 11:59:51 +0200
Subject: [PATCH 07/21] chore: remove premature pool.end call

Signed-off-by: anilb <epipav@gmail.com>
---
 frontend/server/api/chat/chart.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/frontend/server/api/chat/chart.ts b/frontend/server/api/chat/chart.ts
index 22fa0ec0e..5103e1aae 100644
--- a/frontend/server/api/chat/chart.ts
+++ b/frontend/server/api/chat/chart.ts
@@ -19,8 +19,6 @@ async function getRouterReasoningFromConversation(
     const chatRepo = new ChatRepository(pool)
     const latestResponse = await chatRepo.getLatestChatResponseByConversation(conversationId)
 
-    await pool.end()
-
     return latestResponse?.routerReason || undefined
   } catch (error) {
     console.error('Error fetching router reasoning from conversation:', error)

From ee7b2460819d8e8f27a2b3ce9c9ed22b6486e99a Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Thu, 11 Sep 2025 18:07:19 +0200
Subject: [PATCH 08/21] feat: optional pipe source for agents, better overall
 types

Signed-off-by: anilb <epipav@gmail.com>
---
 frontend/lib/chat/chart/analysis.ts      |  2 -
 frontend/lib/chat/chart/base-config.ts   |  1 -
 frontend/lib/chat/chart/generator.ts     |  6 ---
 frontend/lib/chat/data-copilot.ts        | 49 +++++++++++++-----------
 frontend/lib/chat/prompts/pipe.ts        | 14 +++++--
 frontend/lib/chat/prompts/router.ts      | 16 +++++---
 frontend/lib/chat/prompts/text-to-sql.ts | 10 ++++-
 frontend/lib/chat/types.ts               | 19 ++++++++-
 8 files changed, 74 insertions(+), 43 deletions(-)

diff --git a/frontend/lib/chat/chart/analysis.ts b/frontend/lib/chat/chart/analysis.ts
index 6df2ebe5b..98052b81a 100644
--- a/frontend/lib/chat/chart/analysis.ts
+++ b/frontend/lib/chat/chart/analysis.ts
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: MIT
 /* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable @typescript-eslint/no-unused-vars */
-// Copyright (c) 2025 The Linux Foundation and each contributor.
-// SPDX-License-Identifier: MIT
 import { DateTime } from 'luxon'
 import type { Result } from './types'
 interface RecommendedVisualization {
diff --git a/frontend/lib/chat/chart/base-config.ts b/frontend/lib/chat/chart/base-config.ts
index 90bad7721..85b4fedaa 100644
--- a/frontend/lib/chat/chart/base-config.ts
+++ b/frontend/lib/chat/chart/base-config.ts
@@ -1,6 +1,5 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
-
 import type { Config } from './types'
 import { lfxColors } from '~/config/styles/colors'
 
diff --git a/frontend/lib/chat/chart/generator.ts b/frontend/lib/chat/chart/generator.ts
index 1117af639..48bd63ae4 100644
--- a/frontend/lib/chat/chart/generator.ts
+++ b/frontend/lib/chat/chart/generator.ts
@@ -4,8 +4,6 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable max-len */
 /* eslint-disable vue/max-len */
-// Copyright (c) 2025 The Linux Foundation and each contributor.
-// SPDX-License-Identifier: MIT
 import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'
 import { generateObject } from 'ai'
 import { outputSchema } from './types'
@@ -46,10 +44,6 @@ const chartColors = {
     lines: [lfxColors.positive[500], lfxColors.negative[500], lfxColors.brand[300]],
   },
 }
-
-// Legacy defaultColors for backward compatibility (if needed elsewhere)
-// const defaultColors = chartColors.single;
-
 const model = bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0')
 
 export async function generateChartConfig(
diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 03033230b..7fc36364a 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -1,11 +1,11 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
-/* eslint-disable @typescript-eslint/no-explicit-any */
 import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'
 import {
   experimental_createMCPClient as createMCPClient,
   createDataStreamResponse,
   type LanguageModelV1,
+  type DataStreamWriter,
 } from 'ai'
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
 import type { Pool } from 'pg'
@@ -15,11 +15,13 @@ import { ChatRepository } from '../../server/repo/chat.repo'
 import { TextToSqlAgent, PipeAgent, RouterAgent } from './agents'
 import { executePipeInstructions, executeTextToSqlInstructions } from './instructions'
 import type {
+  AgentResponseCompleteParams,
   DataCopilotQueryInput,
   PipeAgentInput,
   PipeAgentStreamInput,
   PipeInstructions,
   RouterAgentInput,
+  RouterOutput,
   TextToSqlAgentInput,
   TextToSqlAgentStreamInput,
 } from './types'
@@ -31,12 +33,26 @@ const bedrock = createAmazonBedrock({
   region: process.env.NUXT_AWS_BEDROCK_REGION,
 })
 
+type MCPClient = Awaited<ReturnType<typeof createMCPClient>>
+
+type TbTools = Record<
+  string,
+  {
+    description?: string
+    meta?: { description?: string }
+    inputSchema?: unknown
+    parameters?: unknown
+    schema?: unknown
+    [key: string]: unknown // Allow additional properties
+  }
+>
+
 export class DataCopilot {
   /** MCP client for communicating with Tinybird services */
-  private mcpClient: any
+  private mcpClient!: MCPClient
 
   /** Available Tinybird tools loaded from MCP server */
-  private tbTools: Record<string, any> = {}
+  private tbTools: TbTools = {}
 
   /** Human-readable overview of tools for router agent decision making */
   private toolsOverview: string = ''
@@ -85,7 +101,7 @@ export class DataCopilot {
 
     this.toolsOverview = Object.entries(this.tbTools)
       .filter(([name]) => !excludedFromOverview.has(name))
-      .map(([name, def]: [string, any]) => {
+      .map(([name, def]: [string, TbTools[string]]) => {
         try {
           const description = def?.description || def?.meta?.description || ''
           const inputSchema = def?.inputSchema || def?.parameters || def?.schema || undefined
@@ -170,7 +186,7 @@ export class DataCopilot {
     segmentId,
     reformulatedQuestion,
   }: TextToSqlAgentInput) {
-    const followUpTools: Record<string, any> = {}
+    const followUpTools: Record<string, unknown> = {}
     followUpTools['text_to_sql'] = this.tbTools['text_to_sql']
     followUpTools['list_datasources'] = this.tbTools['list_datasources']
 
@@ -213,7 +229,7 @@ export class DataCopilot {
     reformulatedQuestion,
     toolNames,
   }: Omit<PipeAgentInput, 'model' | 'tools'>) {
-    const followUpTools: Record<string, any> = {}
+    const followUpTools: Record<string, unknown> = {}
     for (const toolName of toolNames) {
       if (this.tbTools[toolName]) {
         followUpTools[toolName] = this.tbTools[toolName]
@@ -365,16 +381,13 @@ export class DataCopilot {
    */
   private async handleStopAction(
     userPrompt: string,
-    routerOutput: any,
-    responseData: any,
-    dataStream: any,
+    routerOutput: RouterOutput,
+    responseData: ChatResponse,
+    dataStream: DataStreamWriter,
     insightsDbPool: Pool,
     userEmail: string,
     conversationId?: string,
   ): Promise<void> {
-    responseData.reasoning = `Router Decision: ${routerOutput.next_action}\nReasoning: ${routerOutput.reasoning}`
-    responseData.answer = routerOutput.reasoning
-
     dataStream.writeData({
       type: StreamDataType.ROUTER_STATUS,
       status: StreamDataStatus.COMPLETE,
@@ -497,17 +510,7 @@ export class DataCopilot {
     insightsDbPool,
     userEmail,
     dataStream,
-  }: {
-    userPrompt: string
-    responseData: ChatResponse
-    routerOutput: any
-    pipeInstructions?: PipeInstructions
-    sqlQuery?: string
-    conversationId?: string
-    insightsDbPool: Pool
-    userEmail: string
-    dataStream: any
-  }): Promise<void> {
+  }: AgentResponseCompleteParams): Promise<void> {
     const chatResponseId = await this.saveChatResponse(
       {
         userPrompt,
diff --git a/frontend/lib/chat/prompts/pipe.ts b/frontend/lib/chat/prompts/pipe.ts
index eebcb49dd..906ac99c8 100644
--- a/frontend/lib/chat/prompts/pipe.ts
+++ b/frontend/lib/chat/prompts/pipe.ts
@@ -8,12 +8,19 @@ export const pipePrompt = (
   segmentId: string | null,
   reformulatedQuestion: string,
   tools: string[],
-) => `
+) => {
+  const dashboardDescription = pipe
+    ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}`
+    : `Project "${projectName}"${parametersString ? ` with parameters: ${parametersString}` : ''}`
+
+  const usePipeInstruction = pipe ? `- Use ${pipe} with different parameters if needed` : ''
+
+  return `
 You are a pipe tool specialist that creates an execution plan to answer: "${reformulatedQuestion}"
 
 # DATE AND CONTEXT
 Today's date: ${date}
-Current dashboard: Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}
+Current dashboard: ${dashboardDescription}
 Segment ID: ${segmentId || 'not specified'}
 
 # AVAILABLE TOOLS
@@ -67,7 +74,7 @@ Your response must include an "instructions" field with this structure:
 - Execute the pipes and examine what columns are returned, and which columns are needed to answer the question
 - Map the columns from pipe results to the final output structure using "type": "direct"
 - Add formula columns when calculations are needed (e.g., growth rates, percentages, differences)
-- Use ${pipe} with different parameters if needed
+${usePipeInstruction}
 - Use other available tools if they're more appropriate
 - Call multiple tools if needed to answer the question
 - Combine columns from multiple pipes if needed for comprehensive answers
@@ -130,3 +137,4 @@ Always ensure variables in formulas match the dependency variable names.
    - Answer the reformulated question directly
    - Use the tools specified by the router
    - Be concise and accurate in your response`
+}
diff --git a/frontend/lib/chat/prompts/router.ts b/frontend/lib/chat/prompts/router.ts
index 9116f7ddb..1b44b310a 100644
--- a/frontend/lib/chat/prompts/router.ts
+++ b/frontend/lib/chat/prompts/router.ts
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: MIT
 /* eslint-disable max-len */
 /* eslint-disable vue/max-len */
-// Copyright (c) 2025 The Linux Foundation and each contributor.
-// SPDX-License-Identifier: MIT
 export const routerPrompt = (
   date: string,
   projectName: string,
@@ -11,11 +9,18 @@ export const routerPrompt = (
   parametersString: string,
   segmentId: string | null,
   toolsOverview: string,
-) => `You are a routing agent that analyzes user questions and determines the appropriate next action. Your job is to evaluate questions and decide whether they can be answered with existing tools, need custom queries, or cannot be answered.
+) => {
+  const dashboardDescription = pipe
+    ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}`
+    : `Project "${projectName}"${parametersString ? ` with parameters: ${parametersString}` : ''}`
+
+  const pipeToolQuestion = pipe ? `- Can ${pipe} tool answer this with different parameters?` : ''
+
+  return `You are a routing agent that analyzes user questions and determines the appropriate next action. Your job is to evaluate questions and decide whether they can be answered with existing tools, need custom queries, or cannot be answered.
 
 # DATE AND CONTEXT
 Today's date: ${date}
-Current dashboard: Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}
+Current dashboard: ${dashboardDescription}
 Segment ID: ${segmentId || 'not specified'}
 
 # YOUR ROLE
@@ -37,7 +42,7 @@ ${toolsOverview}
 
 **Step 1: Check Existing Tools (HIGHEST PRIORITY)**
 - **FIRST: For activity-count-related queries (stars count, forks count, commits count, etc.) → ALWAYS use activities_count or activities_cumulative_count pipes**
-- Can ${pipe} tool answer this with different parameters?
+${pipeToolQuestion}
 - IMPORTANT: Only the parameters listed in the tool's parameters are valid. You cannot add extra parameters.
   - For example, adding a country code parameter to a tool that doesn't support it is invalid.
 - Can other available tools answer this question?
@@ -87,3 +92,4 @@ It must be something user-friendly.
 - If the action is "create_query", the reasoning must be something like "I'll create a query to answer the question."
 - If the action is "pipes", the reasoning must be something like "I'll use the widgets <tool1> and <tool2> to answer the question."
 `
+}
diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts
index c48367dac..9cff0a130 100644
--- a/frontend/lib/chat/prompts/text-to-sql.ts
+++ b/frontend/lib/chat/prompts/text-to-sql.ts
@@ -7,14 +7,19 @@ export const textToSqlPrompt = (
   parametersString: string,
   segmentId: string | null,
   reformulatedQuestion: string,
-) => `
+) => {
+  const dashboardDescription = pipe
+    ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}`
+    : `Project "${projectName}"${parametersString ? ` with parameters: ${parametersString}` : ''}`
+
+  return `
 You are an expert SQL query generator that creates execution plans to answer: "${reformulatedQuestion}"
 
 Think step-by-step through the structured approach below. Be methodical and careful to ensure accuracy.
 
 # DATE AND CONTEXT
 Today's date: ${date}
-Current dashboard: Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}
+Current dashboard: ${dashboardDescription}
 Segment ID: ${segmentId || 'not specified'}
 
 # YOUR TASK
@@ -211,3 +216,4 @@ IMPORTANT REMINDERS:
 - Return the query with appropriate LIMIT in the instructions (not the test LIMIT 5)
 - Build your query completely and correctly BEFORE testing
 - Put MAXIMUM effort into getting it right the first time`
+}
diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts
index 5f155e1f6..feafb4389 100644
--- a/frontend/lib/chat/types.ts
+++ b/frontend/lib/chat/types.ts
@@ -4,6 +4,7 @@
 import type { DataStreamWriter } from 'ai'
 import type { Pool } from 'pg'
 import { z } from 'zod'
+import { RouterDecisionAction } from './enums'
 import type { ChatResponse } from '~~/server/repo/chat.repo'
 
 // ============================================
@@ -89,7 +90,11 @@ export type Instructions = z.infer<typeof instructionsSchema>
 
 // Router agent output schema
 export const routerOutputSchema = z.object({
-  next_action: z.enum(['stop', 'create_query', 'pipes']),
+  next_action: z.enum([
+    RouterDecisionAction.STOP,
+    RouterDecisionAction.CREATE_QUERY,
+    RouterDecisionAction.PIPES,
+  ]),
   reasoning: z.string().describe('Maximum 2 sentences explaining the decision'),
   reformulated_question: z.string().describe('Enhanced query with all parameters'),
   tools: z.array(z.string()).describe('Tools needed for next agent'),
@@ -179,3 +184,15 @@ export interface TextToSqlAgentStreamInput {
   reformulatedQuestion: string
   dataStream: any
 }
+
+export interface AgentResponseCompleteParams {
+  userPrompt: string
+  responseData: ChatResponse
+  routerOutput: RouterOutput
+  pipeInstructions?: PipeInstructions
+  sqlQuery?: string
+  conversationId?: string
+  insightsDbPool: Pool
+  userEmail: string
+  dataStream: DataStreamWriter
+}

From 5f484c10b35aa2418c94532523d241505d200c74 Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Fri, 12 Sep 2025 18:11:57 +0300
Subject: [PATCH 09/21] feat: allow text_to_sql agent to execute queries for
 validation

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/agents/text-to-sql.ts  | 41 +++++++++++++
 frontend/lib/chat/data-copilot.ts        |  1 +
 frontend/lib/chat/prompts/text-to-sql.ts | 75 ++++++++++++++----------
 3 files changed, 85 insertions(+), 32 deletions(-)

diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts
index 9cce9b4a5..72d7162f4 100644
--- a/frontend/lib/chat/agents/text-to-sql.ts
+++ b/frontend/lib/chat/agents/text-to-sql.ts
@@ -78,4 +78,45 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
   protected override shouldMonitorToolCalls(_input: TextToSqlAgentInput): boolean {
     return true // Enable tool call monitoring for SQL agent
   }
+
+  /**
+   * Override to add validation for text_to_sql tool calls
+   */
+  protected override logToolCalls(response: any): void {
+    // Call parent method first to get normal logging
+    super.logToolCalls(response)
+
+    // Add validation for text_to_sql tool calls
+    if (!response.steps || response.steps.length === 0) return
+
+    for (const step of response.steps) {
+      if (step.toolCalls && step.toolCalls.length > 0) {
+        for (const call of step.toolCalls) {
+          if (call.toolName === 'text_to_sql') {
+            const question = call.args?.question || ''
+            
+            // Check if the question looks like SQL code (basic heuristic)
+            if (this.looksLikeSQL(question)) {
+              console.error(`❌ WARNING: text_to_sql tool called with SQL code instead of natural language question:`)
+              console.error(`Question: ${question}`)
+              console.error('text_to_sql tool should receive natural language questions, not SQL code')
+              // Don't throw error, just warn - allow the process to continue
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Basic heuristic to detect if a string looks like SQL code
+   */
+  private looksLikeSQL(text: string): boolean {
+    const sqlKeywords = ['SELECT', 'FROM', 'WHERE', 'JOIN', 'GROUP BY', 'ORDER BY', 'LIMIT', 'WITH']
+    const upperText = text.toUpperCase()
+    
+    // If it contains multiple SQL keywords, it's likely SQL code
+    const keywordCount = sqlKeywords.filter(keyword => upperText.includes(keyword)).length
+    return keywordCount >= 2
+  }
 }
diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 7fc36364a..523c6df58 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -189,6 +189,7 @@ export class DataCopilot {
     const followUpTools: Record<string, unknown> = {}
     followUpTools['text_to_sql'] = this.tbTools['text_to_sql']
     followUpTools['list_datasources'] = this.tbTools['list_datasources']
+    followUpTools['execute_query'] = this.tbTools['execute_query']
 
     const agent = new TextToSqlAgent()
     return agent.execute({
diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts
index 9cff0a130..b9a49bd44 100644
--- a/frontend/lib/chat/prompts/text-to-sql.ts
+++ b/frontend/lib/chat/prompts/text-to-sql.ts
@@ -39,14 +39,18 @@ Your response must include an "instructions" field with a query string:
 - Study the schema carefully, noting column names and types
 - Identify which tables contain the data you need
 
+**text_to_sql Tool:**
+- Use with the natural language question to generate the SQL query
+- Pass the user's question as the 'question' parameter
+- Do NOT pass SQL code to this tool - it expects natural language questions only
+- Build your understanding from schema first, then use text_to_sql
+- You may need to refine your question and call this tool again based on validation results
+
 **execute_query Tool:**
-- This is for VALIDATION, not experimentation
-- Build your query carefully and completely BEFORE testing
+- Use for VALIDATION after generating SQL with text_to_sql
 - Add LIMIT 5 when validating to check the query works
-- Use the tool to validate your query works
-- If it fails or the results don't answer the question, fix the specific error and test again
-- The returned SQL should be the EXACT tested query (with appropriate LIMIT, not the test LIMIT 5)
-- Put extra effort to get it right the first time to avoid iterations
+- If it fails, refine your natural language question and try text_to_sql again
+- The final returned SQL should have appropriate LIMIT (not the test LIMIT 5)
 
 Remember: Think through the ENTIRE query before testing. Minimize iterations.
 
@@ -61,24 +65,22 @@ Follow this step-by-step process:
 - Identify which tables are relevant based on the query
 - Understand the available tables, columns, and relationships
 
-**STEP 2: BUILD THE COMPLETE QUERY**
-- Design a query that fulfills the user's request
-- Apply ALL query enhancement rules
-- Use Tinybird's ClickHouse SQL syntax
-- ALWAYS filter by segmentId on activityRelations_deduplicated_cleaned_ds when applicable
-- ALWAYS include timestamp filters when querying time-based data
-- Ensure you're using the EXACT table and column names from the schemas
-- Double-check all table/column names match the schema exactly
-- Make sure the query is COMPLETE and CORRECT before proceeding
+**STEP 2: GENERATE AND VALIDATE SQL QUERY**
+- Use the text_to_sql tool with the reformulated question
+- Pass the natural language question to the 'question' parameter
+- Apply ALL query enhancement rules in your question formulation
+- Ensure the question mentions segmentId filtering when applicable
+- Ensure the question mentions timestamp filtering for time-based queries
 
 **STEP 3: VALIDATION**
-- Use execute_query with your complete query (add LIMIT 5 for testing)
-- If it succeeds: Return the query in the instructions with the appropriate LIMIT (not LIMIT 5)
-- If it fails: Fix the specific error and test again
-- Put maximum effort into getting it right the first time
+- Use execute_query with the generated SQL (add LIMIT 5 for testing)
+- If it succeeds: Proceed to return instructions with appropriate LIMIT
+- If it fails: Refine your natural language question and try text_to_sql again
+- Put maximum effort into getting it right with minimal iterations
 
 **STEP 4: RETURN INSTRUCTIONS**
-- Create the instructions with your validated SQL query
+- Create the instructions with the validated SQL query
+- Use appropriate LIMIT for final query (not the test LIMIT 5)
 - Do not return the data, only the query plan
 - Provide a brief explanation of your query logic
 
@@ -176,6 +178,12 @@ length(), empty(), notEmpty(), arrayElement(), has(), hasAll(), hasAny(),
 indexOf(), arrayCount(), arraySum(), arrayAvg(), arrayMin(), arrayMax(),
 arrayUniq(), arrayJoin(), arrayConcat(), arraySlice(), arraySort(), arrayReverse()
 
+**Window Functions:**
+row_number(), rank(), dense_rank(), percent_rank(),
+lagInFrame(x[, offset[, default]]), leadInFrame(x[, offset[, default]]),
+first_value(x), last_value(x), nth_value(x, offset)
+Note: Use lagInFrame() instead of LAG(), leadInFrame() instead of LEAD()
+
 **JSON Functions (if table has JSON columns):**
 JSONExtract(), JSONExtractString(), JSONExtractInt(), JSONExtractFloat(),
 JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength()
@@ -190,10 +198,11 @@ JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength()
 # CRITICAL REMINDERS
 
 1. **Tool Usage Discipline:**
-   - list_datasources: Use ONCE at the beginning
-   - execute_query: Use for validation, minimize iterations
-   - Think through the ENTIRE query before testing
-   - Put maximum effort into getting it right the first time
+   - list_datasources: Use ONCE at the beginning to understand schema
+   - text_to_sql: Use with natural language questions (may need refinement)
+   - execute_query: Use for validation with LIMIT 5
+   - Do NOT call text_to_sql with SQL code - only natural language questions
+   - Put maximum effort into formulating questions correctly to minimize iterations
 
 2. **Always Apply Filters:**
    - segmentId filter on activityRelations_deduplicated_cleaned_ds when applicable
@@ -201,9 +210,10 @@ JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength()
    - Use provided parameters as defaults
 
 3. **Efficiency:**
-   - Build complete, correct queries before testing
-   - Minimize iterations - get it right the first time
-   - Use existing tools when possible
+   - Understand schema first, then generate and validate SQL
+   - Minimize iterations by crafting good natural language questions
+   - Use validation to ensure query works before returning instructions
+   - Use the text_to_sql tool properly with natural language questions only
 
 **RESPONSE GUIDELINES**
 - Create a clear SQL query in the instructions
@@ -211,9 +221,10 @@ JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength()
 - Provide a brief explanation of your query selection and how it answers the question
 
 IMPORTANT REMINDERS:
-- Use list_datasources ONCE at the beginning
-- Use execute_query for validation with LIMIT 5 (iterate if needed, but minimize iterations)
-- Return the query with appropriate LIMIT in the instructions (not the test LIMIT 5)
-- Build your query completely and correctly BEFORE testing
-- Put MAXIMUM effort into getting it right the first time`
+- Use list_datasources ONCE at the beginning to understand available tables
+- Use text_to_sql with natural language questions (NOT SQL code)
+- Use execute_query with LIMIT 5 to validate the generated SQL works
+- Return the validated query in the instructions field (with appropriate LIMIT)
+- Formulate questions to text_to_sql correctly to minimize validation iterations
+- Put MAXIMUM effort into getting the question right with minimal iterations`
 }

From f030dfb7213f4203d7f4f412f25c9f65f9204045 Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Fri, 12 Sep 2025 18:22:35 +0300
Subject: [PATCH 10/21] fix: less agressive sql detection

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/agents/text-to-sql.ts | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts
index 72d7162f4..0b1b4288c 100644
--- a/frontend/lib/chat/agents/text-to-sql.ts
+++ b/frontend/lib/chat/agents/text-to-sql.ts
@@ -112,11 +112,16 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
    * Basic heuristic to detect if a string looks like SQL code
    */
   private looksLikeSQL(text: string): boolean {
-    const sqlKeywords = ['SELECT', 'FROM', 'WHERE', 'JOIN', 'GROUP BY', 'ORDER BY', 'LIMIT', 'WITH']
-    const upperText = text.toUpperCase()
+    // More specific SQL patterns that indicate actual SQL code, not natural language
+    const sqlPatterns = [
+      /^\s*SELECT\s+/i,           // Starts with SELECT
+      /\bFROM\s+\w+\s*$/i,        // Ends with FROM table
+      /\bSELECT\s+.*\s+FROM\s+/i, // Contains SELECT ... FROM pattern
+      /\bWITH\s+\w+\s+AS\s*\(/i,  // CTE pattern WITH name AS (
+      /\bUNION\s+(ALL\s+)?SELECT/i, // UNION SELECT pattern
+    ]
     
-    // If it contains multiple SQL keywords, it's likely SQL code
-    const keywordCount = sqlKeywords.filter(keyword => upperText.includes(keyword)).length
-    return keywordCount >= 2
+    // Check for actual SQL structure patterns
+    return sqlPatterns.some(pattern => pattern.test(text))
   }
 }

From 476fab9c071494dbf514cb1e924de43a8bcc4bb1 Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Fri, 12 Sep 2025 18:36:51 +0300
Subject: [PATCH 11/21] chore: text-to-sql logging

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/data-copilot.ts | 24 ++++++++++++++++++++++++
 frontend/lib/chat/instructions.ts | 22 ++++++++++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 523c6df58..12b666e78 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -431,6 +431,12 @@ export class DataCopilot {
     reformulatedQuestion,
     dataStream,
   }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> {
+    console.warn('🚀 handleCreateQueryAction called with:', {
+      reformulatedQuestion: reformulatedQuestion?.substring(0, 100) + '...',
+      projectName,
+      pipe,
+    })
+
     const textToSqlOutput = await this.runTextToSqlAgent({
       messages,
       date,
@@ -441,8 +447,24 @@ export class DataCopilot {
       reformulatedQuestion,
     })
 
+    console.warn('🤖 TextToSqlAgent output:', {
+      hasExplanation: !!textToSqlOutput.explanation,
+      explanationLength: textToSqlOutput.explanation?.length || 0,
+      hasInstructions: !!textToSqlOutput.instructions,
+      instructionsType: typeof textToSqlOutput.instructions,
+      instructionsLength: textToSqlOutput.instructions?.length || 0,
+      instructionsPreview: textToSqlOutput.instructions?.substring(0, 150) + '...',
+    })
+
     const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)
 
+    console.warn('📊 Query execution result:', {
+      queryDataType: typeof queryData,
+      queryDataLength: queryData?.length || 0,
+      isArray: Array.isArray(queryData),
+      firstItemKeys: queryData?.[0] ? Object.keys(queryData[0]) : null,
+    })
+
     dataStream.writeData({
       type: StreamDataType.SQL_RESULT,
       explanation: textToSqlOutput.explanation,
@@ -450,6 +472,8 @@ export class DataCopilot {
       data: queryData,
     })
 
+    console.warn('📤 Streamed data to client with type:', StreamDataType.SQL_RESULT)
+
     return { sqlQuery: textToSqlOutput.instructions }
   }
 
diff --git a/frontend/lib/chat/instructions.ts b/frontend/lib/chat/instructions.ts
index 96e7e205b..ee6929444 100644
--- a/frontend/lib/chat/instructions.ts
+++ b/frontend/lib/chat/instructions.ts
@@ -208,6 +208,12 @@ export async function executePipeInstructions(instructions: PipeInstructions): P
 
 // Function to execute a SQL query via TinyBird's Query API
 export async function executeTextToSqlInstructions(query: TextToSqlInstructions): Promise<any[]> {
+  console.warn('🔍 executeTextToSqlInstructions called with query:', {
+    queryType: typeof query,
+    queryLength: query?.length || 0,
+    queryPreview: query?.substring(0, 100) + (query?.length > 100 ? '...' : ''),
+  })
+
   const tinybirdBaseUrl = process.env.NUXT_TINYBIRD_BASE_URL
   const tinybirdToken = process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN
 
@@ -219,7 +225,13 @@ export async function executeTextToSqlInstructions(query: TextToSqlInstructions)
     // Execute the SQL query via TinyBird's Query API
     // TinyBird expects the query as URL-encoded form data
     const params = new URLSearchParams()
-    params.append('q', `${query} FORMAT JSON`)
+    const finalQuery = `${query} FORMAT JSON`
+    params.append('q', finalQuery)
+    
+    console.warn('📤 Executing TinyBird query:', {
+      url: `${tinybirdBaseUrl}/v0/sql`,
+      query: finalQuery.substring(0, 200) + (finalQuery.length > 200 ? '...' : ''),
+    })
 
     const response = await ofetch(`${tinybirdBaseUrl}/v0/sql`, {
       method: 'POST',
@@ -230,10 +242,16 @@ export async function executeTextToSqlInstructions(query: TextToSqlInstructions)
       body: params.toString(),
     })
 
+    console.warn('📥 TinyBird response:', {
+      hasData: !!response.data,
+      dataLength: response.data?.length || 0,
+      responseKeys: Object.keys(response || {}),
+    })
+
     // TinyBird SQL API response format has data array
     return response.data || []
   } catch (error: any) {
-    console.error('Error executing SQL query:', error)
+    console.error('❌ Error executing SQL query:', error)
     // Log more details about the error
     if (error.data) {
       console.error('Error response data:', error.data)

From 34013b281d471c3ab367a0ed2a86dea06bbecd55 Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Fri, 12 Sep 2025 18:58:42 +0300
Subject: [PATCH 12/21] chore: blocking streaming logging

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/data-copilot.ts | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 12b666e78..5a20f99d5 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -268,6 +268,13 @@ export class DataCopilot {
     const date = new Date().toISOString().slice(0, 10)
 
     return createDataStreamResponse({
+      headers: {
+        'Cache-Control': 'no-cache, no-store, must-revalidate',
+        'Pragma': 'no-cache',
+        'Expires': '0',
+        'Connection': 'keep-alive',
+        'X-Accel-Buffering': 'no', // Disable Nginx buffering if present
+      },
       execute: async (dataStream) => {
         const responseData: ChatResponse = {
           userPrompt: messages[messages.length - 1]?.content || '',
@@ -282,10 +289,12 @@ export class DataCopilot {
         }
 
         try {
+          console.warn('📤 Writing ANALYZING status to stream at:', new Date().toISOString())
           dataStream.writeData({
             type: StreamDataType.ROUTER_STATUS,
             status: StreamDataStatus.ANALYZING,
           })
+          console.warn('✅ ANALYZING status written to stream')
 
           const routerOutput = await this.runRouterAgent({
             messages,
@@ -315,12 +324,14 @@ export class DataCopilot {
             return
           }
 
+          console.warn('📤 Writing COMPLETE status to stream at:', new Date().toISOString())
           dataStream.writeData({
             type: StreamDataType.ROUTER_STATUS,
             status: StreamDataStatus.COMPLETE,
             reasoning: routerOutput.reasoning,
             reformulatedQuestion: routerOutput.reformulated_question,
           })
+          console.warn('✅ COMPLETE status written to stream')
 
           let sqlQuery: string | undefined = undefined
           let pipeInstructions: PipeInstructions | undefined = undefined
@@ -366,11 +377,14 @@ export class DataCopilot {
             dataStream,
           })
         } catch (error) {
+          console.error('❌ Error in streamingAgentRequestHandler:', error)
+          console.warn('📤 Writing ERROR status to stream at:', new Date().toISOString())
           dataStream.writeData({
             type: 'router-status',
             status: 'error',
             error: error instanceof Error ? error.message : 'An error occurred',
           })
+          console.warn('✅ ERROR status written to stream')
           throw error
         }
       },

From c42efea70b2bd845b060690e942d7108128b88a1 Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Fri, 12 Sep 2025 19:11:02 +0300
Subject: [PATCH 13/21] chore: headers for disable buffering

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/data-copilot.ts  | 7 -------
 frontend/server/api/chat/stream.ts | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 5a20f99d5..697c32f08 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -268,13 +268,6 @@ export class DataCopilot {
     const date = new Date().toISOString().slice(0, 10)
 
     return createDataStreamResponse({
-      headers: {
-        'Cache-Control': 'no-cache, no-store, must-revalidate',
-        'Pragma': 'no-cache',
-        'Expires': '0',
-        'Connection': 'keep-alive',
-        'X-Accel-Buffering': 'no', // Disable Nginx buffering if present
-      },
       execute: async (dataStream) => {
         const responseData: ChatResponse = {
           userPrompt: messages[messages.length - 1]?.content || '',
diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts
index 337e05988..490005a0e 100644
--- a/frontend/server/api/chat/stream.ts
+++ b/frontend/server/api/chat/stream.ts
@@ -17,6 +17,13 @@ interface IStreamRequestBody {
 }
 
 export default defineEventHandler(async (event): Promise<Response | Error> => {
+  // Set streaming headers immediately
+  setHeader(event, 'Cache-Control', 'no-cache, no-store, must-revalidate')
+  setHeader(event, 'Pragma', 'no-cache')
+  setHeader(event, 'Expires', '0')
+  setHeader(event, 'Connection', 'keep-alive')
+  setHeader(event, 'X-Accel-Buffering', 'no')
+  
   try {
     const { messages, projectName, pipe, parameters, conversationId, projectSlug } =
       await readBody<IStreamRequestBody>(event)

From 56cbed740a5ddf5f2d6ddff376bdf2d6f4f814c9 Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Fri, 12 Sep 2025 19:23:20 +0300
Subject: [PATCH 14/21] chore: moved createDataStreamResponse out of
 DataCopilot to test streaming

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/data-copilot.ts  | 13 ++++---------
 frontend/lib/chat/types.ts         |  1 +
 frontend/server/api/chat/stream.ts | 24 +++++++++++++++---------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 697c32f08..f22932b6f 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -3,9 +3,7 @@
 import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'
 import {
   experimental_createMCPClient as createMCPClient,
-  createDataStreamResponse,
   type LanguageModelV1,
-  type DataStreamWriter,
 } from 'ai'
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
 import type { Pool } from 'pg'
@@ -263,13 +261,12 @@ export class DataCopilot {
     conversationId,
     insightsDbPool,
     userEmail,
-  }: DataCopilotQueryInput): Promise<Response> {
+    dataStream,
+  }: DataCopilotQueryInput): Promise<void> {
     const parametersString = JSON.stringify(parameters || {})
     const date = new Date().toISOString().slice(0, 10)
 
-    return createDataStreamResponse({
-      execute: async (dataStream) => {
-        const responseData: ChatResponse = {
+    const responseData: ChatResponse = {
           userPrompt: messages[messages.length - 1]?.content || '',
           inputTokens: 0,
           outputTokens: 0,
@@ -380,8 +377,6 @@ export class DataCopilot {
           console.warn('✅ ERROR status written to stream')
           throw error
         }
-      },
-    })
   }
 
   /**
@@ -391,7 +386,7 @@ export class DataCopilot {
     userPrompt: string,
     routerOutput: RouterOutput,
     responseData: ChatResponse,
-    dataStream: DataStreamWriter,
+    dataStream: any,
     insightsDbPool: Pool,
     userEmail: string,
     conversationId?: string,
diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts
index feafb4389..24f41cea5 100644
--- a/frontend/lib/chat/types.ts
+++ b/frontend/lib/chat/types.ts
@@ -162,6 +162,7 @@ export interface DataCopilotQueryInput {
   conversationId?: string
   insightsDbPool: Pool
   userEmail: string
+  dataStream: DataStreamWriter // DataStreamWriter from AI SDK
 }
 
 export interface TextToSqlAgentInput {
diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts
index 490005a0e..10c419f19 100644
--- a/frontend/server/api/chat/stream.ts
+++ b/frontend/server/api/chat/stream.ts
@@ -1,6 +1,7 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
 import type { Pool } from 'pg'
+import { createDataStreamResponse } from 'ai'
 import { DataCopilot } from '~~/lib/chat/data-copilot'
 import { InsightsProjectsRepository } from '~~/server/repo/insightsProjects.repo'
 import { ChatMessage } from '~~/lib/chat/types'
@@ -54,15 +55,20 @@ export default defineEventHandler(async (event): Promise<Response | Error> => {
     const dataCopilot = new DataCopilot()
     await dataCopilot.initialize()
 
-    return dataCopilot.streamingAgentRequestHandler({
-      messages,
-      segmentId: insightsProjects.segmentId,
-      projectName,
-      pipe,
-      parameters,
-      conversationId: finalConversationId,
-      insightsDbPool,
-      userEmail: event.context.user.email,
+    return createDataStreamResponse({
+      execute: async (dataStream) => {
+        await dataCopilot.streamingAgentRequestHandler({
+          messages,
+          segmentId: insightsProjects.segmentId,
+          projectName,
+          pipe,
+          parameters,
+          conversationId: finalConversationId,
+          insightsDbPool,
+          userEmail: event.context.user.email,
+          dataStream, // Pass the dataStream to the class
+        })
+      }
     })
   } catch (error) {
     return createError({

From 5207800de677f54ef48dabe0a3752a640d6cc10f Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Sat, 13 Sep 2025 20:49:21 +0300
Subject: [PATCH 15/21] chore: test headers for cf streaming issues

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/data-copilot.ts  | 6 ++++++
 frontend/server/api/chat/stream.ts | 7 ++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index f22932b6f..d5ab9edda 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -1,5 +1,6 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
+/* eslint-disable @typescript-eslint/no-explicit-any */
 import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'
 import {
   experimental_createMCPClient as createMCPClient,
@@ -284,6 +285,11 @@ export class DataCopilot {
             type: StreamDataType.ROUTER_STATUS,
             status: StreamDataStatus.ANALYZING,
           })
+          // Add padding for Cloudflare streaming threshold
+          dataStream.writeData({
+            type: 'padding',
+            data: ' '.repeat(2048),
+          })
           console.warn('✅ ANALYZING status written to stream')
 
           const routerOutput = await this.runRouterAgent({
diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts
index 10c419f19..22c812054 100644
--- a/frontend/server/api/chat/stream.ts
+++ b/frontend/server/api/chat/stream.ts
@@ -18,12 +18,13 @@ interface IStreamRequestBody {
 }
 
 export default defineEventHandler(async (event): Promise<Response | Error> => {
-  // Set streaming headers immediately
-  setHeader(event, 'Cache-Control', 'no-cache, no-store, must-revalidate')
+  // Set streaming headers for Cloudflare compatibility
+  setHeader(event, 'Cache-Control', 'no-cache, no-store, must-revalidate, no-transform')
   setHeader(event, 'Pragma', 'no-cache')
   setHeader(event, 'Expires', '0')
-  setHeader(event, 'Connection', 'keep-alive')
   setHeader(event, 'X-Accel-Buffering', 'no')
+  setHeader(event, 'Content-Type', 'text/plain; charset=utf-8')
+  setHeader(event, 'Connection', 'close')
   
   try {
     const { messages, projectName, pipe, parameters, conversationId, projectSlug } =

From 9196bc62fe8bbc78b493977240bade36a8e4ead7 Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Sat, 13 Sep 2025 21:16:08 +0300
Subject: [PATCH 16/21] fix: enforcing text-to-sql response type, code cleaning

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/agents/base-agent.ts   | 18 +++++++++---
 frontend/lib/chat/data-copilot.ts        | 35 ------------------------
 frontend/lib/chat/instructions.ts        | 17 ------------
 frontend/lib/chat/prompts/text-to-sql.ts | 17 ++++++++----
 4 files changed, 25 insertions(+), 62 deletions(-)

diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts
index 0e1be535c..2927f485f 100644
--- a/frontend/lib/chat/agents/base-agent.ts
+++ b/frontend/lib/chat/agents/base-agent.ts
@@ -157,33 +157,43 @@ export abstract class BaseAgent<TInput, TOutput> {
    * Extract and validate JSON from the response text
    */
   protected getJson(text: string): TOutput {
+    // Debug logging to see what the agent actually returned
+    console.warn(`🔍 ${this.name} agent raw response:`, text.substring(0, 500) + (text.length > 500 ? '...' : ''))
+
     // First, try simple JSON.parse since the text usually contains valid JSON
     let parsedOutput
     try {
       parsedOutput = JSON.parse(text)
+      console.warn(`✅ ${this.name} agent JSON.parse succeeded`)
     } catch {
       // Fall back to extractJSON if direct parsing fails
       try {
         parsedOutput = extractJSON(text)
+        console.warn(`✅ ${this.name} agent extractJSON succeeded`)
       } catch (error) {
-        console.error(`${this.name} agent failed to parse JSON:`, error)
-        console.error(`Response text:`, text)
+        console.error(`❌ ${this.name} agent failed to parse JSON:`, error)
+        console.error(`❌ Response text:`, text)
         throw new Error(`${this.name} agent did not return valid JSON`)
       }
     }
 
     if (!parsedOutput) {
-      console.error('No JSON found in the response')
+      console.error('❌ No JSON found in the response')
       console.error(text)
       throw new Error(`${this.name} agent did not return valid JSON`)
     }
 
+    // Debug logging for parsed output
+    console.warn(`🔍 ${this.name} agent parsed output:`, JSON.stringify(parsedOutput, null, 2))
+
     // Validate against schema
     try {
       const validatedOutput = this.outputSchema.parse(parsedOutput)
+      console.warn(`✅ ${this.name} agent schema validation succeeded`)
       return validatedOutput
     } catch (error) {
-      console.error(`Failed to validate ${this.name} JSON`, error)
+      console.error(`❌ Failed to validate ${this.name} JSON`, error)
+      console.error(`❌ Parsed output was:`, JSON.stringify(parsedOutput, null, 2))
       throw new Error(`Failed to validate ${this.name} JSON: ${error}`)
     }
   }
diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index d5ab9edda..ab380e3f9 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -280,17 +280,11 @@ export class DataCopilot {
         }
 
         try {
-          console.warn('📤 Writing ANALYZING status to stream at:', new Date().toISOString())
           dataStream.writeData({
             type: StreamDataType.ROUTER_STATUS,
             status: StreamDataStatus.ANALYZING,
           })
           // Add padding for Cloudflare streaming threshold
-          dataStream.writeData({
-            type: 'padding',
-            data: ' '.repeat(2048),
-          })
-          console.warn('✅ ANALYZING status written to stream')
 
           const routerOutput = await this.runRouterAgent({
             messages,
@@ -320,14 +314,12 @@ export class DataCopilot {
             return
           }
 
-          console.warn('📤 Writing COMPLETE status to stream at:', new Date().toISOString())
           dataStream.writeData({
             type: StreamDataType.ROUTER_STATUS,
             status: StreamDataStatus.COMPLETE,
             reasoning: routerOutput.reasoning,
             reformulatedQuestion: routerOutput.reformulated_question,
           })
-          console.warn('✅ COMPLETE status written to stream')
 
           let sqlQuery: string | undefined = undefined
           let pipeInstructions: PipeInstructions | undefined = undefined
@@ -373,14 +365,11 @@ export class DataCopilot {
             dataStream,
           })
         } catch (error) {
-          console.error('❌ Error in streamingAgentRequestHandler:', error)
-          console.warn('📤 Writing ERROR status to stream at:', new Date().toISOString())
           dataStream.writeData({
             type: 'router-status',
             status: 'error',
             error: error instanceof Error ? error.message : 'An error occurred',
           })
-          console.warn('✅ ERROR status written to stream')
           throw error
         }
   }
@@ -439,12 +428,6 @@ export class DataCopilot {
     reformulatedQuestion,
     dataStream,
   }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> {
-    console.warn('🚀 handleCreateQueryAction called with:', {
-      reformulatedQuestion: reformulatedQuestion?.substring(0, 100) + '...',
-      projectName,
-      pipe,
-    })
-
     const textToSqlOutput = await this.runTextToSqlAgent({
       messages,
       date,
@@ -455,24 +438,8 @@ export class DataCopilot {
       reformulatedQuestion,
     })
 
-    console.warn('🤖 TextToSqlAgent output:', {
-      hasExplanation: !!textToSqlOutput.explanation,
-      explanationLength: textToSqlOutput.explanation?.length || 0,
-      hasInstructions: !!textToSqlOutput.instructions,
-      instructionsType: typeof textToSqlOutput.instructions,
-      instructionsLength: textToSqlOutput.instructions?.length || 0,
-      instructionsPreview: textToSqlOutput.instructions?.substring(0, 150) + '...',
-    })
-
     const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)
 
-    console.warn('📊 Query execution result:', {
-      queryDataType: typeof queryData,
-      queryDataLength: queryData?.length || 0,
-      isArray: Array.isArray(queryData),
-      firstItemKeys: queryData?.[0] ? Object.keys(queryData[0]) : null,
-    })
-
     dataStream.writeData({
       type: StreamDataType.SQL_RESULT,
       explanation: textToSqlOutput.explanation,
@@ -480,8 +447,6 @@ export class DataCopilot {
       data: queryData,
     })
 
-    console.warn('📤 Streamed data to client with type:', StreamDataType.SQL_RESULT)
-
     return { sqlQuery: textToSqlOutput.instructions }
   }
 
diff --git a/frontend/lib/chat/instructions.ts b/frontend/lib/chat/instructions.ts
index ee6929444..428a9a37b 100644
--- a/frontend/lib/chat/instructions.ts
+++ b/frontend/lib/chat/instructions.ts
@@ -208,12 +208,6 @@ export async function executePipeInstructions(instructions: PipeInstructions): P
 
 // Function to execute a SQL query via TinyBird's Query API
 export async function executeTextToSqlInstructions(query: TextToSqlInstructions): Promise<any[]> {
-  console.warn('🔍 executeTextToSqlInstructions called with query:', {
-    queryType: typeof query,
-    queryLength: query?.length || 0,
-    queryPreview: query?.substring(0, 100) + (query?.length > 100 ? '...' : ''),
-  })
-
   const tinybirdBaseUrl = process.env.NUXT_TINYBIRD_BASE_URL
   const tinybirdToken = process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN
 
@@ -228,11 +222,6 @@ export async function executeTextToSqlInstructions(query: TextToSqlInstructions)
     const finalQuery = `${query} FORMAT JSON`
     params.append('q', finalQuery)
     
-    console.warn('📤 Executing TinyBird query:', {
-      url: `${tinybirdBaseUrl}/v0/sql`,
-      query: finalQuery.substring(0, 200) + (finalQuery.length > 200 ? '...' : ''),
-    })
-
     const response = await ofetch(`${tinybirdBaseUrl}/v0/sql`, {
       method: 'POST',
       headers: {
@@ -242,12 +231,6 @@ export async function executeTextToSqlInstructions(query: TextToSqlInstructions)
       body: params.toString(),
     })
 
-    console.warn('📥 TinyBird response:', {
-      hasData: !!response.data,
-      dataLength: response.data?.length || 0,
-      responseKeys: Object.keys(response || {}),
-    })
-
     // TinyBird SQL API response format has data array
     return response.data || []
   } catch (error: any) {
diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts
index b9a49bd44..86b94c909 100644
--- a/frontend/lib/chat/prompts/text-to-sql.ts
+++ b/frontend/lib/chat/prompts/text-to-sql.ts
@@ -47,10 +47,11 @@ Your response must include an "instructions" field with a query string:
 - You may need to refine your question and call this tool again based on validation results
 
 **execute_query Tool:**
-- Use for VALIDATION after generating SQL with text_to_sql
+- Use ONLY for VALIDATION after generating SQL with text_to_sql
 - Add LIMIT 5 when validating to check the query works
 - If it fails, refine your natural language question and try text_to_sql again
 - The final returned SQL should have appropriate LIMIT (not the test LIMIT 5)
+- CRITICAL: Do NOT include query results in your final JSON response - only the query string
 
 Remember: Think through the ENTIRE query before testing. Minimize iterations.
 
@@ -77,11 +78,13 @@ Follow this step-by-step process:
 - If it succeeds: Proceed to return instructions with appropriate LIMIT
 - If it fails: Refine your natural language question and try text_to_sql again
 - Put maximum effort into getting it right with minimal iterations
+- IMPORTANT: Validation results are NOT part of your final response - only use for verification
 
 **STEP 4: RETURN INSTRUCTIONS**
 - Create the instructions with the validated SQL query
 - Use appropriate LIMIT for final query (not the test LIMIT 5)
-- Do not return the data, only the query plan
+- CRITICAL: Return ONLY the SQL query string in "instructions" field - NO query results/data
+- Your JSON response must contain ONLY: explanation and instructions (SQL query string)
 - Provide a brief explanation of your query logic
 
 # QUERY ENHANCEMENT RULES
@@ -215,10 +218,12 @@ JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength()
    - Use validation to ensure query works before returning instructions
    - Use the text_to_sql tool properly with natural language questions only
 
-**RESPONSE GUIDELINES**
-- Create a clear SQL query in the instructions
-- Do not return the data from the tools used, only the query plan
-- Provide a brief explanation of your query selection and how it answers the question
+**RESPONSE GUIDELINES - CRITICAL**
+- Your JSON response must contain ONLY two fields: "explanation" and "instructions"
+- "instructions" field must contain ONLY the SQL query string (no results, no data)
+- "explanation" field must contain a brief explanation of your query selection
+- NEVER include query results, validation data, or tool outputs in your JSON response
+- The query will be executed separately - you only provide the query string
 
 IMPORTANT REMINDERS:
 - Use list_datasources ONCE at the beginning to understand available tables

From d10ea8140bf1efd4927b2d9b8690cfb891b681e7 Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Mon, 15 Sep 2025 03:23:47 +0300
Subject: [PATCH 17/21] feat: improved text-to-sql, keepalives for cf

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/lib/chat/agents/base-agent.ts   |  24 ++-
 frontend/lib/chat/agents/text-to-sql.ts  | 178 +++++++++++++++------
 frontend/lib/chat/data-copilot.ts        |  84 +++++++---
 frontend/lib/chat/prompts/text-to-sql.ts | 192 +++++++----------------
 frontend/nuxt.config.ts                  |  12 +-
 5 files changed, 261 insertions(+), 229 deletions(-)

diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts
index 2927f485f..1f362d573 100644
--- a/frontend/lib/chat/agents/base-agent.ts
+++ b/frontend/lib/chat/agents/base-agent.ts
@@ -160,20 +160,28 @@ export abstract class BaseAgent<TInput, TOutput> {
     // Debug logging to see what the agent actually returned
     console.warn(`🔍 ${this.name} agent raw response:`, text.substring(0, 500) + (text.length > 500 ? '...' : ''))
 
-    // First, try simple JSON.parse since the text usually contains valid JSON
+    // Try multiple parsing strategies for speed and reliability
     let parsedOutput
     try {
+      // Strategy 1: Direct JSON.parse (fastest)
       parsedOutput = JSON.parse(text)
       console.warn(`✅ ${this.name} agent JSON.parse succeeded`)
     } catch {
-      // Fall back to extractJSON if direct parsing fails
       try {
-        parsedOutput = extractJSON(text)
-        console.warn(`✅ ${this.name} agent extractJSON succeeded`)
-      } catch (error) {
-        console.error(`❌ ${this.name} agent failed to parse JSON:`, error)
-        console.error(`❌ Response text:`, text)
-        throw new Error(`${this.name} agent did not return valid JSON`)
+        // Strategy 2: Try parsing after trimming and cleaning
+        const cleanedText = text.trim().replace(/^```json\s*|\s*```$/g, '')
+        parsedOutput = JSON.parse(cleanedText)
+        console.warn(`✅ ${this.name} agent cleaned JSON.parse succeeded`)
+      } catch {
+        // Strategy 3: Fall back to extractJSON (slower but more robust)
+        try {
+          parsedOutput = extractJSON(text)
+          console.warn(`✅ ${this.name} agent extractJSON succeeded`)
+        } catch (error) {
+          console.error(`❌ ${this.name} agent failed to parse JSON:`, error)
+          console.error(`❌ Response text:`, text)
+          throw new Error(`${this.name} agent did not return valid JSON`)
+        }
       }
     }
 
diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts
index 0b1b4288c..bcb6de756 100644
--- a/frontend/lib/chat/agents/text-to-sql.ts
+++ b/frontend/lib/chat/agents/text-to-sql.ts
@@ -1,8 +1,6 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
 /* eslint-disable @typescript-eslint/no-explicit-any */
-// Copyright (c) 2025 The Linux Foundation and each contributor.
-// SPDX-License-Identifier: MIT
 import { z } from 'zod'
 import { textToSqlInstructionsSchema } from '../types'
 import { textToSqlPrompt } from '../prompts/text-to-sql'
@@ -31,10 +29,134 @@ interface TextToSqlAgentInput {
 }
 
 export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
+  /**
+   * Generate SQL query using tools and text extraction
+   */
+  override async execute(
+    input: TextToSqlAgentInput & { messages: any[] }
+  ): Promise<SqlOutput & { usage?: any }> {
+    try {
+      const { generateText } = await import('ai')
+      const systemPrompt = this.getSystemPrompt(input)
+      const tools = this.getTools(input)
+      const conversationHistoryReceipt = this.generateConversationHistoryReceipt(input.messages)
+
+      // Remove broken text_to_sql tool, keep working ones
+      const workingTools = { ...tools }
+      delete workingTools['text_to_sql']
+
+      const fullSystemPrompt = conversationHistoryReceipt + systemPrompt + `
+
+## CRITICAL INSTRUCTIONS
+1. Use tools BRIEFLY to understand schema (max 2 tool calls)
+2. Then STOP calling tools and write the SQL query
+3. Put your SQL in a markdown code block: \`\`\`sql ... \`\`\`
+4. You MUST conclude with a final SQL query - do not keep exploring!`
+
+      const generateConfig: any = {
+        model: this.getModel(input),
+        system: fullSystemPrompt,
+        tools: workingTools,
+        maxSteps: this.maxSteps,
+        temperature: this.temperature,
+      }
+
+      const providerOptions = this.getProviderOptions(input)
+      if (providerOptions) {
+        generateConfig.providerOptions = providerOptions
+      }
+
+      generateConfig.messages = input.messages.filter(
+        (msg: any) => msg.content && msg.content.trim() !== '' && msg.role === 'user',
+      ).slice(-1)
+
+      const response = await generateText(generateConfig)
+
+      if (this.shouldMonitorToolCalls(input)) {
+        this.logToolCalls(response)
+      }
+
+      // Extract SQL from text response
+      const result = this.extractSqlFromTextResponse(response)
+      console.warn("🔍 Extracted SQL:", result.instructions)
+
+      return {
+        ...result,
+        usage: response.usage,
+      }
+    } catch (error) {
+      throw this.createError(error)
+    }
+  }
+
+
+  /**
+   * Extract SQL query from text response when tools fail
+   */
+  private extractSqlFromTextResponse(response: any): SqlOutput {
+    const text = response.text || ''
+
+    // Look for SQL code blocks
+    const sqlBlockMatch = text.match(/```sql\n([\s\S]*?)\n```/i)
+    if (sqlBlockMatch && sqlBlockMatch[1]) {
+      const sqlQuery = this.cleanSqlQuery(sqlBlockMatch[1].trim())
+      return {
+        explanation: "Generated SQL query based on database schema analysis",
+        instructions: sqlQuery,
+      }
+    }
+
+    // Look for WITH or SELECT statements in the text
+    const withMatch = text.match(/\b(WITH[\s\S]*?ORDER BY[^;]*;?)/i)
+    const selectMatch = text.match(/\b(SELECT[\s\S]*?ORDER BY[^;]*;?)/i)
+
+    if (withMatch && withMatch[1]) {
+      return {
+        explanation: "Extracted SQL query from agent response",
+        instructions: this.cleanSqlQuery(withMatch[1].trim()),
+      }
+    }
+
+    if (selectMatch && selectMatch[1]) {
+      return {
+        explanation: "Extracted SQL query from agent response",
+        instructions: this.cleanSqlQuery(selectMatch[1].trim()),
+      }
+    }
+
+    // Fallback: look for any SQL-like content
+    const generalSqlMatch = text.match(/\b((?:WITH|SELECT)[\s\S]*?)(?=\n\n|\n(?![A-Z\s,()])|$)/i)
+    if (generalSqlMatch && generalSqlMatch[1]) {
+      return {
+        explanation: "Extracted SQL query from agent response",
+        instructions: this.cleanSqlQuery(generalSqlMatch[1].trim()),
+      }
+    }
+
+    throw new Error('Could not extract SQL query from response text')
+  }
+
+  /**
+   * Clean SQL query for Tinybird compatibility
+   */
+  private cleanSqlQuery(sql: string): string {
+    // Remove SQL comments (both line and block comments)
+    sql = sql.replace(/--.*$/gm, '') // Remove line comments
+    sql = sql.replace(/\/\*[\s\S]*?\*\//g, '') // Remove block comments
+
+    // Remove trailing semicolon (Tinybird doesn't allow it)
+    sql = sql.replace(/;\s*$/, '')
+
+    // Clean up extra whitespace
+    sql = sql.replace(/\s+/g, ' ').trim()
+
+    return sql
+  }
+
   readonly name = 'SQL'
   readonly outputSchema = sqlOutputSchema
   readonly temperature = 0
-  readonly maxSteps = 10 // Allow multiple steps for SQL generation and execution
+  readonly maxSteps = 3
 
   protected getModel(input: TextToSqlAgentInput): any {
     return input.model
@@ -70,58 +192,12 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
   protected override getProviderOptions(_input: TextToSqlAgentInput): any {
     return {
       bedrock: {
-        reasoningConfig: { type: 'enabled', budgetTokens: 3000 },
+        reasoningConfig: { type: 'enabled', budgetTokens: 1500 }, // Reduced from 3000 for faster responses
       },
     }
   }
 
   protected override shouldMonitorToolCalls(_input: TextToSqlAgentInput): boolean {
-    return true // Enable tool call monitoring for SQL agent
-  }
-
-  /**
-   * Override to add validation for text_to_sql tool calls
-   */
-  protected override logToolCalls(response: any): void {
-    // Call parent method first to get normal logging
-    super.logToolCalls(response)
-
-    // Add validation for text_to_sql tool calls
-    if (!response.steps || response.steps.length === 0) return
-
-    for (const step of response.steps) {
-      if (step.toolCalls && step.toolCalls.length > 0) {
-        for (const call of step.toolCalls) {
-          if (call.toolName === 'text_to_sql') {
-            const question = call.args?.question || ''
-            
-            // Check if the question looks like SQL code (basic heuristic)
-            if (this.looksLikeSQL(question)) {
-              console.error(`❌ WARNING: text_to_sql tool called with SQL code instead of natural language question:`)
-              console.error(`Question: ${question}`)
-              console.error('text_to_sql tool should receive natural language questions, not SQL code')
-              // Don't throw error, just warn - allow the process to continue
-            }
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Basic heuristic to detect if a string looks like SQL code
-   */
-  private looksLikeSQL(text: string): boolean {
-    // More specific SQL patterns that indicate actual SQL code, not natural language
-    const sqlPatterns = [
-      /^\s*SELECT\s+/i,           // Starts with SELECT
-      /\bFROM\s+\w+\s*$/i,        // Ends with FROM table
-      /\bSELECT\s+.*\s+FROM\s+/i, // Contains SELECT ... FROM pattern
-      /\bWITH\s+\w+\s+AS\s*\(/i,  // CTE pattern WITH name AS (
-      /\bUNION\s+(ALL\s+)?SELECT/i, // UNION SELECT pattern
-    ]
-    
-    // Check for actual SQL structure patterns
-    return sqlPatterns.some(pattern => pattern.test(text))
+    return false // Enable tool call monitoring for SQL agent
   }
 }
diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index ab380e3f9..5965ac465 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -185,10 +185,9 @@ export class DataCopilot {
     segmentId,
     reformulatedQuestion,
   }: TextToSqlAgentInput) {
-    const followUpTools: Record<string, unknown> = {}
-    followUpTools['text_to_sql'] = this.tbTools['text_to_sql']
-    followUpTools['list_datasources'] = this.tbTools['list_datasources']
-    followUpTools['execute_query'] = this.tbTools['execute_query']
+    const followUpTools = this.tbTools
+    delete followUpTools['execute_query']
+
 
     const agent = new TextToSqlAgent()
     return agent.execute({
@@ -250,6 +249,29 @@ export class DataCopilot {
     })
   }
 
+  /**
+   * Send keepalive message to prevent Cloudflare timeout
+   */
+  private sendKeepalive(dataStream: any, message: string): void {
+    dataStream.writeData({
+      type: 'keepalive',
+      message,
+      timestamp: new Date().toISOString(),
+    })
+  }
+
+  /**
+   * Send progress update message
+   */
+  private sendProgress(dataStream: any, status: string, message: string): void {
+    dataStream.writeData({
+      type: StreamDataType.ROUTER_STATUS,
+      status: 'progress',
+      message,
+      timestamp: new Date().toISOString(),
+    })
+  }
+
   /**
    * Main streaming handler that orchestrates the entire AI agent workflow
    */
@@ -428,26 +450,42 @@ export class DataCopilot {
     reformulatedQuestion,
     dataStream,
   }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> {
-    const textToSqlOutput = await this.runTextToSqlAgent({
-      messages,
-      date,
-      projectName,
-      pipe,
-      parametersString,
-      segmentId,
-      reformulatedQuestion,
-    })
-
-    const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)
-
-    dataStream.writeData({
-      type: StreamDataType.SQL_RESULT,
-      explanation: textToSqlOutput.explanation,
-      instructions: textToSqlOutput.instructions,
-      data: queryData,
-    })
+    // Send progress update before starting TextToSql agent
+    this.sendProgress(dataStream, 'progress', 'Analyzing database schema...')
+
+    // Set up keepalive interval during long operation
+    const keepaliveInterval = setInterval(() => {
+      this.sendKeepalive(dataStream, 'Processing SQL query generation...')
+    }, 15000) // Send keepalive every 15 seconds
+
+    try {
+      const textToSqlOutput = await this.runTextToSqlAgent({
+          messages,
+          date,
+          projectName,
+          pipe,
+          parametersString,
+          segmentId,
+          reformulatedQuestion,
+        })
+
+      clearInterval(keepaliveInterval)
+      this.sendProgress(dataStream, 'progress', `SQL query generated! Executing [${textToSqlOutput.instructions}]...`)
+
+      const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)
+
+      dataStream.writeData({
+        type: StreamDataType.SQL_RESULT,
+        explanation: textToSqlOutput.explanation,
+        instructions: textToSqlOutput.instructions,
+        data: queryData,
+      })
 
-    return { sqlQuery: textToSqlOutput.instructions }
+      return { sqlQuery: textToSqlOutput.instructions }
+    } catch (error) {
+      clearInterval(keepaliveInterval)
+      throw error
+    }
   }
 
   /**
diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts
index 86b94c909..058ac5d0f 100644
--- a/frontend/lib/chat/prompts/text-to-sql.ts
+++ b/frontend/lib/chat/prompts/text-to-sql.ts
@@ -8,107 +8,63 @@ export const textToSqlPrompt = (
   segmentId: string | null,
   reformulatedQuestion: string,
 ) => {
-  const dashboardDescription = pipe
-    ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}`
-    : `Project "${projectName}"${parametersString ? ` with parameters: ${parametersString}` : ''}`
-
   return `
-You are an expert SQL query generator that creates execution plans to answer: "${reformulatedQuestion}"
-
-Think step-by-step through the structured approach below. Be methodical and careful to ensure accuracy.
-
-# DATE AND CONTEXT
-Today's date: ${date}
-Current dashboard: ${dashboardDescription}
-Segment ID: ${segmentId || 'not specified'}
-
-# YOUR TASK
-
-You must return instructions that describe the SQL query to execute.
-
-**INSTRUCTIONS STRUCTURE**
-Your response must include an "instructions" field with a query string:
-{
-  "instructions": "SELECT ... FROM ... WHERE ..."  // The complete SQL query to execute
-}
-
-# CRITICAL TOOL USAGE RULES
-
-**list_datasources Tool:**
-- Use ONCE at the beginning to understand available tables and schemas
-- Study the schema carefully, noting column names and types
-- Identify which tables contain the data you need
-
-**text_to_sql Tool:**
-- Use with the natural language question to generate the SQL query
-- Pass the user's question as the 'question' parameter
-- Do NOT pass SQL code to this tool - it expects natural language questions only
-- Build your understanding from schema first, then use text_to_sql
-- You may need to refine your question and call this tool again based on validation results
-
-**execute_query Tool:**
-- Use ONLY for VALIDATION after generating SQL with text_to_sql
-- Add LIMIT 5 when validating to check the query works
-- If it fails, refine your natural language question and try text_to_sql again
-- The final returned SQL should have appropriate LIMIT (not the test LIMIT 5)
-- CRITICAL: Do NOT include query results in your final JSON response - only the query string
-
-Remember: Think through the ENTIRE query before testing. Minimize iterations.
-
-# YOUR TASK - STRUCTURED APPROACH
-
-Follow this step-by-step process:
-
-**STEP 1: UNDERSTAND THE QUESTION AND READ SCHEMAS**
-- Analyze what the user is asking for
-- Use list_datasources to see available tables and schemas
-- Study the schema carefully, noting column names and types
-- Identify which tables are relevant based on the query
-- Understand the available tables, columns, and relationships
-
-**STEP 2: GENERATE AND VALIDATE SQL QUERY**
-- Use the text_to_sql tool with the reformulated question
-- Pass the natural language question to the 'question' parameter
-- Apply ALL query enhancement rules in your question formulation
-- Ensure the question mentions segmentId filtering when applicable
-- Ensure the question mentions timestamp filtering for time-based queries
-
-**STEP 3: VALIDATION**
-- Use execute_query with the generated SQL (add LIMIT 5 for testing)
-- If it succeeds: Proceed to return instructions with appropriate LIMIT
-- If it fails: Refine your natural language question and try text_to_sql again
-- Put maximum effort into getting it right with minimal iterations
-- IMPORTANT: Validation results are NOT part of your final response - only use for verification
-
-**STEP 4: RETURN INSTRUCTIONS**
-- Create the instructions with the validated SQL query
-- Use appropriate LIMIT for final query (not the test LIMIT 5)
-- CRITICAL: Return ONLY the SQL query string in "instructions" field - NO query results/data
-- Your JSON response must contain ONLY: explanation and instructions (SQL query string)
-- Provide a brief explanation of your query logic
-
-# QUERY ENHANCEMENT RULES
-
-**CORE PRINCIPLES:**
-- For non-timeseries data, cap results at 20 unless explicitly specified
-- Choose the sorting metric that makes the most sense based on the user's question
-- Never return just IDs - always include names or human-readable identifiers
-- Stay as close as possible to the user's request
-- Single value queries should return a single row and skip null or 0 values
-
-**TIMESERIES DATA RULES:**
-- If no time range specified: use year-to-date (YTD) as the default range
-- If user asks for "YTD" or "year-to-date": use the date range from January 1st of the current year to today
-- If time range specified: use appropriate granularity
-- Always sort chronologically (oldest to newest)
-- For trends/evolution queries: likely want cumulative data
-
-**FOLLOW-UP REQUEST CONSISTENCY:**
-- Maintain the same time granularity as previous queries unless explicitly changed
-- Preserve context from earlier queries (e.g., filters, groupings)
+You need to generate a SQL query to answer: "${reformulatedQuestion}"
+
+Context:
+- Date: ${date}
+- Project: ${projectName}
+- Segment ID: ${segmentId || 'not specified'}
+
+Instructions:
+1. BRIEFLY use list_datasources to understand available tables
+2. Optionally use 1-2 other tools for quick data exploration
+3. THEN IMMEDIATELY write a SQL query that answers the question
+4. You have maximum 3 steps - use them wisely
+5. Your final response MUST contain the SQL query in markdown code block
+
+CRITICAL: After understanding the schema, stop using tools and write the SQL query!
+
+## CRITICAL ANTI-JOIN RULE
+**For anti-join patterns (finding rows in A that don't exist in B), use LEFT JOIN with empty string check.**
+
+**IMPORTANT**: Tinybird has two key limitations:
+1. NOT EXISTS with correlated subqueries is NOT supported
+2. LEFT JOIN fills unmatched columns with default values (like '') instead of NULL
+
+Example - CORRECT (use LEFT JOIN + empty string check):
+\`\`\`sql
+WITH table_a_data AS (
+  SELECT id, name FROM table_a WHERE condition
+),
+table_b_ids AS (
+  SELECT DISTINCT a_id FROM table_b WHERE condition
+)
+SELECT a.id, a.name
+FROM table_a_data a
+LEFT JOIN table_b_ids b ON b.a_id = a.id
+WHERE b.a_id = ''
+\`\`\`
+
+Example - AVOID (NOT EXISTS - will fail in Tinybird):
+\`\`\`sql
+WHERE NOT EXISTS (SELECT 1 FROM table_b b WHERE b.a_id = a.id)
+\`\`\`
+
+Example - AVOID (IS NULL check - will fail because Tinybird uses default values):
+\`\`\`sql
+LEFT JOIN table_b b ON b.a_id = a.id WHERE b.a_id IS NULL
+\`\`\`
 
 # TINYBIRD SQL COMPLETE REFERENCE
 
+## TINYBIRD LIMITATIONS
+- **NOT EXISTS with correlated subqueries is completely unsupported**
+- **LEFT JOIN fills unmatched columns with default values (like '') instead of NULL**
+- **Use LEFT JOIN + empty string check (= '') for anti-join patterns, not IS NULL**
+- **No semicolons allowed** - Queries must not end with semicolon
+- **Multi-statements not allowed** - One query per request
+
 ## ALLOWED SQL STATEMENTS
 - **ONLY SELECT statements are supported**
 - All SELECT clauses are fully supported: FROM, JOIN, WHERE, GROUP BY, ORDER BY, LIMIT, OFFSET, HAVING, WITH
@@ -191,45 +147,5 @@ Note: Use lagInFrame() instead of LAG(), leadInFrame() instead of LEAD()
 JSONExtract(), JSONExtractString(), JSONExtractInt(), JSONExtractFloat(),
 JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength()
 
-## CRITICAL CONSTRAINTS
-1. **NO subqueries in FROM clause** - Use JOINs instead
-2. **LIMIT is recommended** - Always include LIMIT unless you need all results
-3. **Aggregations require GROUP BY** - Include all non-aggregate columns
-4. **SETTINGS clause** - Goes at the very end: SETTINGS join_use_nulls = 1
-5. **Table references** - Use database.schema.table format when available
-
-# CRITICAL REMINDERS
-
-1. **Tool Usage Discipline:**
-   - list_datasources: Use ONCE at the beginning to understand schema
-   - text_to_sql: Use with natural language questions (may need refinement)
-   - execute_query: Use for validation with LIMIT 5
-   - Do NOT call text_to_sql with SQL code - only natural language questions
-   - Put maximum effort into formulating questions correctly to minimize iterations
-
-2. **Always Apply Filters:**
-   - segmentId filter on activityRelations_deduplicated_cleaned_ds when applicable
-   - timestamp filters for time-based queries
-   - Use provided parameters as defaults
-
-3. **Efficiency:**
-   - Understand schema first, then generate and validate SQL
-   - Minimize iterations by crafting good natural language questions
-   - Use validation to ensure query works before returning instructions
-   - Use the text_to_sql tool properly with natural language questions only
-
-**RESPONSE GUIDELINES - CRITICAL**
-- Your JSON response must contain ONLY two fields: "explanation" and "instructions"
-- "instructions" field must contain ONLY the SQL query string (no results, no data)
-- "explanation" field must contain a brief explanation of your query selection
-- NEVER include query results, validation data, or tool outputs in your JSON response
-- The query will be executed separately - you only provide the query string
-
-IMPORTANT REMINDERS:
-- Use list_datasources ONCE at the beginning to understand available tables
-- Use text_to_sql with natural language questions (NOT SQL code)
-- Use execute_query with LIMIT 5 to validate the generated SQL works
-- Return the validated query in the instructions field (with appropriate LIMIT)
-- Formulate questions to text_to_sql correctly to minimize validation iterations
-- Put MAXIMUM effort into getting the question right with minimal iterations`
+Focus on understanding the data schema first, then writing an effective SQL query.`
 }
diff --git a/frontend/nuxt.config.ts b/frontend/nuxt.config.ts
index 6466b8d87..d4246bfbe 100644
--- a/frontend/nuxt.config.ts
+++ b/frontend/nuxt.config.ts
@@ -52,15 +52,9 @@ export default defineNuxtConfig({
     insightsDbWriteHost: 'localhost',
     insightsDbReadHost: 'localhost',
     insightsDbPort: 5432,
-    insightsDbUsername: "postgres",
-    insightsDbPassword: "example",
-    insightsDbDatabase: "insights",
-    cmDbWriteHost: "localhost",
-    cmDbReadHost: "localhost",
-    cmDbPort: 5432,
-    cmDbUsername: "postgres",
-    cmDbPassword: "example",
-    cmDbDatabase: "crowd-web",
+    insightsDbUsername: 'postgres',
+    insightsDbPassword: 'example',
+    insightsDbDatabase: 'insights',
     // These are also exposed on the client-side
     public: {
       apiBase: '/api',

From c9cc1b32cc4a6c2babcab017da1640a86e26a32d Mon Sep 17 00:00:00 2001
From: anil <epipav@gmail.com>
Date: Mon, 15 Sep 2025 03:40:49 +0300
Subject: [PATCH 18/21] chore: readd cm related keys to nuxt config

Signed-off-by: anil <epipav@gmail.com>
---
 frontend/nuxt.config.ts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/frontend/nuxt.config.ts b/frontend/nuxt.config.ts
index d4246bfbe..20f8c144b 100644
--- a/frontend/nuxt.config.ts
+++ b/frontend/nuxt.config.ts
@@ -55,6 +55,12 @@ export default defineNuxtConfig({
     insightsDbUsername: 'postgres',
     insightsDbPassword: 'example',
     insightsDbDatabase: 'insights',
+    cmDbWriteHost: "localhost",
+    cmDbReadHost: "localhost",
+    cmDbPort: 5432,
+    cmDbUsername: "postgres",
+    cmDbPassword: "example",
+    cmDbDatabase: "crowd-web",
     // These are also exposed on the client-side
     public: {
       apiBase: '/api',

From 70f32e8a214ccc9027b53fc79db90202813a658d Mon Sep 17 00:00:00 2001
From: Efren Lim <elim@linuxfoundation.org>
Date: Thu, 18 Sep 2025 16:17:04 +0800
Subject: [PATCH 19/21] chore: remove required pipe check

Signed-off-by: Efren Lim <elim@linuxfoundation.org>
---
 frontend/server/api/chat/stream.ts | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts
index 22c812054..048891d51 100644
--- a/frontend/server/api/chat/stream.ts
+++ b/frontend/server/api/chat/stream.ts
@@ -25,15 +25,11 @@ export default defineEventHandler(async (event): Promise<Response | Error> => {
   setHeader(event, 'X-Accel-Buffering', 'no')
   setHeader(event, 'Content-Type', 'text/plain; charset=utf-8')
   setHeader(event, 'Connection', 'close')
-  
+
   try {
     const { messages, projectName, pipe, parameters, conversationId, projectSlug } =
       await readBody<IStreamRequestBody>(event)
 
-    if (!pipe) {
-      return createError({ statusCode: 400, statusMessage: 'Pipe is required' })
-    }
-
     if (!projectSlug) {
       return createError({ statusCode: 400, statusMessage: 'Project slug is required' })
     }
@@ -69,7 +65,7 @@ export default defineEventHandler(async (event): Promise<Response | Error> => {
           userEmail: event.context.user.email,
           dataStream, // Pass the dataStream to the class
         })
-      }
+      },
     })
   } catch (error) {
     return createError({

From 5f7fe0209c62f4336bf57f352cb17ec5aa39b301 Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Wed, 1 Oct 2025 15:13:14 +0200
Subject: [PATCH 20/21] fix: some router improvements and tests

Signed-off-by: anilb <epipav@gmail.com>
---
 frontend/lib/chat/prompts/router.ts    |  19 +-
 frontend/lib/chat/tests/router.test.ts | 311 +++++++++++++++++++++++++
 2 files changed, 325 insertions(+), 5 deletions(-)
 create mode 100644 frontend/lib/chat/tests/router.test.ts

diff --git a/frontend/lib/chat/prompts/router.ts b/frontend/lib/chat/prompts/router.ts
index 1b44b310a..0c339535b 100644
--- a/frontend/lib/chat/prompts/router.ts
+++ b/frontend/lib/chat/prompts/router.ts
@@ -41,16 +41,24 @@ ${toolsOverview}
 **MANDATORY: Before checking data sources, you MUST first verify if existing pipes can handle the query**
 
 **Step 1: Check Existing Tools (HIGHEST PRIORITY)**
-- **FIRST: For activity-count-related queries (stars count, forks count, commits count, etc.) → ALWAYS use activities_count or activities_cumulative_count pipes**
+- **FIRST: For activity-count-related queries (stars count, forks count, commits count, etc.) → Consider activities_count or activities_cumulative_count pipes**
 ${pipeToolQuestion}
-- IMPORTANT: Only the parameters listed in the tool's parameters are valid. You cannot add extra parameters.
-  - For example, adding a country code parameter to a tool that doesn't support it is invalid.
+- **MANDATORY VALIDATION: Before routing to pipes, verify the pipe can FULLY answer the question:**
+  - **Check dimensions/groupings:** Does the query ask for breakdowns the pipe doesn't support?
+    * Example: "commits by company" → activities_count cannot group by company → USE create_query
+    * Example: "stars by country" → activities_count cannot group by country → USE create_query
+  - **Check parameters:** Does the pipe accept all required parameters?
+    * IMPORTANT: Only the parameters listed in the tool's parameters are valid. You cannot add extra parameters.
+    * For example, adding a country code parameter to a tool that doesn't support it is invalid.
+  - **Check drilldowns:** Does the query need custom aggregations or drilldowns the pipe doesn't provide?
+  - **If a pipe EXISTS but CANNOT answer the specific question → route to "create_query" instead**
 - Can other available tools answer this question?
 - Can a combination of tools provide the answer?
 - **Can the SAME tool be used multiple times with different parameters to create comparisons?**
   - Example: activity tools can be called once for forks, once for stars to compare them
 - **When user refers to activities by their types, you can use activities_count or activities_cumulative_count pipes**
   - Activity types include: stars, forks, commits, pull requests, issues, etc.
+  - BUT only if the query doesn't require custom dimensions/groupings (see validation above)
 - **CRITICAL: For comparative questions (e.g., "this week vs last week", "forks vs stars", "current vs previous period"):**
   - Check if the same tool can be called multiple times with different parameters (time ranges, activity types, etc.)
   - Even if the question asks for a comparison, if the underlying data can be fetched using existing tools, choose "pipes"
@@ -58,7 +66,8 @@ ${pipeToolQuestion}
     * "active contributors this week vs last week" → use contributor tool twice with different date ranges
     * "cumulative forks vs stars last month" → use activity tools twice with different activity types
     * "active contributors vs organization this year" → active_contributors AND active_organization pipes
-- **If ANY existing tool can provide the data → ALWAYS choose "pipes" action**
+- **If existing tools can FULLY provide the data (including all dimensions and groupings) → choose "pipes" action**
+- **If tools exist but cannot provide required dimensions/groupings/drilldowns → choose "create_query" action**
 
 **Step 2: Check Data Sources (only if Step 1 is NO)**
 - Use list_datasources to examine available tables and fields
@@ -70,7 +79,7 @@ ${pipeToolQuestion}
 
 # ROUTING DECISIONS
 - "stop": The question cannot be answered with available data
-- "create_query": Custom SQL query needed using available data sources
+- "create_query": Custom SQL query needed using available data sources (tools can be empty)
 - "pipes": Existing tools can answer the question (specify which tools in the tools array)
 
 # IMPORTANT
diff --git a/frontend/lib/chat/tests/router.test.ts b/frontend/lib/chat/tests/router.test.ts
new file mode 100644
index 000000000..68645b4b8
--- /dev/null
+++ b/frontend/lib/chat/tests/router.test.ts
@@ -0,0 +1,311 @@
+// Copyright (c) 2025 The Linux Foundation and each contributor.
+// SPDX-License-Identifier: MIT
+/* eslint-disable @typescript-eslint/no-explicit-any */
+/**
+ * Router Agent Tests with Real AI Model Execution
+ *
+ * Tests router agent with actual Bedrock model and real Tinybird MCP tools
+ */
+
+import { describe, test, expect, beforeAll, afterAll } from 'vitest'
+import { createAmazonBedrock, type AmazonBedrockProvider } from '@ai-sdk/amazon-bedrock'
+import { experimental_createMCPClient as createMCPClient, type LanguageModelV1 } from 'ai'
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
+
+import { RouterAgent } from '../agents/router'
+import { RouterDecisionAction } from '../enums'
+import type { ChatMessage, RouterAgentInput } from '../types'
+
+let bedrock: AmazonBedrockProvider | null = null
+
+describe('Router Agent', () => {
+  let model: LanguageModelV1
+  let mcpClient: any
+  let tbTools: Record<string, any> = {}
+  let toolsOverview: string = ''
+
+  beforeAll(async () => {
+    // Check if we have the required environment variables
+    const tinybirdToken = process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN
+    const tinybirdBaseUrl = process.env.NUXT_TINYBIRD_BASE_URL
+    const hasAwsCredentials = process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID
+
+    if (!tinybirdToken || !tinybirdBaseUrl || !hasAwsCredentials) {
+      console.warn('⚠️ Skipping real integration tests - missing credentials')
+      console.warn(
+        'Required: NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN, NUXT_TINYBIRD_BASE_URL, AWS Bedrock credentials',
+      )
+      return
+    }
+
+    // Initialize AWS Bedrock model exactly like DataCopilot
+    bedrock = createAmazonBedrock({
+      accessKeyId: process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID,
+      secretAccessKey: process.env.NUXT_AWS_BEDROCK_SECRET_ACCESS_KEY,
+      region: process.env.NUXT_AWS_BEDROCK_REGION,
+    })
+
+    // Initialize model once, like DataCopilot does in constructor
+    const BEDROCK_MODEL_ID = 'us.anthropic.claude-sonnet-4-20250514-v1:0'
+    model = bedrock(BEDROCK_MODEL_ID)
+
+    // Initialize MCP client to get real tools - same as DataCopilot
+    const tbMcpUrl = `https://mcp.tinybird.co?token=${tinybirdToken}&host=${tinybirdBaseUrl}`
+    const url = new URL(tbMcpUrl)
+
+    try {
+      mcpClient = await createMCPClient({
+        transport: new StreamableHTTPClientTransport(url, {
+          sessionId: `test_session_${Date.now()}`,
+        }),
+      })
+
+      // Load real tools from Tinybird MCP
+      tbTools = await mcpClient.tools({})
+      buildToolsOverview()
+
+      console.warn(`✅ Connected to Tinybird MCP - ${Object.keys(tbTools).length} tools loaded`)
+    } catch (error) {
+      console.error('❌ Failed to connect to Tinybird MCP:', error)
+      throw error
+    }
+  }, 30000)
+
+  afterAll(async () => {
+    if (mcpClient) {
+      try {
+        await mcpClient.close?.()
+      } catch (error) {
+        console.warn('Warning: Could not close MCP client:', error)
+      }
+    }
+  })
+
+  // Build tools overview exactly like DataCopilot does
+  function buildToolsOverview(): void {
+    const excludedFromOverview = new Set([
+      'explore_data',
+      'execute_query',
+      'text_to_sql',
+      'list_endpoints',
+      'list_service_datasources',
+    ])
+
+    const toolDescriptions: string[] = []
+    for (const [toolName, tool] of Object.entries(tbTools)) {
+      if (excludedFromOverview.has(toolName)) continue
+
+      const description = tool.description || tool.meta?.description || 'No description available'
+      toolDescriptions.push(`- ${toolName}: ${description}`)
+    }
+
+    toolsOverview = toolDescriptions.join('\n')
+  }
+
+  function createTestInput(userQuery: string): RouterAgentInput {
+    const messages: ChatMessage[] = [{ role: 'user', content: userQuery }]
+
+    console.warn("📝 Creating test input for query:", userQuery)
+
+    return {
+      model,
+      messages,
+      tools: tbTools,
+      toolsOverview,
+      date: new Date().toISOString().slice(0, 10),
+      projectName: 'test-project',
+      pipe: 'test-pipe',
+      parametersString: '{}',
+      segmentId: 'test-segment',
+    }
+  }
+
+  function skipIfNoCredentials() {
+    const hasCredentials =
+      process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN &&
+      process.env.NUXT_TINYBIRD_BASE_URL &&
+      process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID
+
+    if (!hasCredentials) {
+      console.warn('Skipping test - missing credentials')
+      return true
+    }
+    return false
+  }
+
+  describe('Basic functionality', () => {
+    test('should create original router agent successfully', () => {
+      const router = new RouterAgent()
+      expect(router).toBeDefined()
+      expect(router.name).toBe('Router')
+      expect(router.temperature).toBe(0)
+    })
+
+
+    test('should validate output schema for both implementations', () => {
+      const originalRouter = new RouterAgent()
+
+      const validOutput = {
+        next_action: RouterDecisionAction.PIPES,
+        reasoning: 'Test reasoning',
+        reformulated_question: 'Test question',
+        tools: ['activities_count'],
+      }
+
+      // Both should use the same schema
+      expect(originalRouter.outputSchema.safeParse(validOutput).success).toBe(true)
+    })
+
+    test('should reject invalid output for both implementations', () => {
+      const originalRouter = new RouterAgent()
+
+      const invalidOutput = {
+        next_action: 'INVALID_ACTION',
+        reasoning: 'Test reasoning',
+      }
+
+      // Both should reject invalid output
+      expect(originalRouter.outputSchema.safeParse(invalidOutput).success).toBe(false)
+    })
+
+    test('should connect to MCP and load tools', () => {
+      if (skipIfNoCredentials()) return
+
+      expect(Object.keys(tbTools).length).toBeGreaterThan(0)
+      expect(tbTools.list_datasources).toBeDefined()
+      expect(toolsOverview).toContain('activities')
+    })
+  })
+
+  describe('Real AI routing decisions', () => {
+    describe('PIPES routing', () => {
+      test('should route activity queries correctly to PIPE', async () => {
+        if (skipIfNoCredentials()) return
+
+        const router = new RouterAgent()
+        const input = createTestInput('Show me commits this week')
+
+        console.warn('🤖 Sending query to router agent: Show me commits this week')
+        const result = await router.execute(input)
+        console.warn('🤖 Router agent response:', result)
+
+        expect(result.next_action).toBeDefined()
+        expect(Object.values(RouterDecisionAction)).toContain(result.next_action)
+        expect(RouterDecisionAction.PIPES).toBe(result.next_action)
+        expect(result.reasoning).toBeTruthy()
+        expect(result.reformulated_question).toBeTruthy()
+        expect(Array.isArray(result.tools)).toBe(true)
+        expect(result.usage.totalTokens).toBeGreaterThan(0)
+
+        console.warn(`🔍 Activity query routed to: ${result.next_action}`)
+        console.warn(`🔍 Reasoning: ${result.reasoning}`)
+      }, 15000)
+
+      test('should route stars query for previous week to PIPE', async () => {
+        if (skipIfNoCredentials()) return
+
+        const router = new RouterAgent()
+        const input = createTestInput('Show me stars for the previous week')
+
+        const result = await router.execute(input)
+
+        expect(result.next_action).toBe(RouterDecisionAction.PIPES)
+        expect(result.reasoning).toBeTruthy()
+        expect(result.reformulated_question).toBeTruthy()
+        expect(Array.isArray(result.tools)).toBe(true)
+
+        console.warn(`🔍 Stars query routed to: ${result.next_action}`)
+        console.warn(`🔍 Reasoning: ${result.reasoning}`)
+      }, 15000)
+
+      test('should route forks query for last year to PIPE', async () => {
+        if (skipIfNoCredentials()) return
+
+        const router = new RouterAgent()
+        const input = createTestInput('Show me forks for the last year')
+
+        const result = await router.execute(input)
+
+        expect(result.next_action).toBe(RouterDecisionAction.PIPES)
+        expect(result.reasoning).toBeTruthy()
+        expect(result.reformulated_question).toBeTruthy()
+        expect(Array.isArray(result.tools)).toBe(true)
+
+        console.warn(`🔍 Forks query routed to: ${result.next_action}`)
+        console.warn(`🔍 Reasoning: ${result.reasoning}`)
+      }, 15000)
+
+      test('should route companies contributing query to PIPE', async () => {
+        if (skipIfNoCredentials()) return
+
+        const router = new RouterAgent()
+        const input = createTestInput('List of companies contributing in project')
+
+        const result = await router.execute(input)
+
+        expect(result.next_action).toBe(RouterDecisionAction.PIPES)
+        expect(result.reasoning).toBeTruthy()
+        expect(result.reformulated_question).toBeTruthy()
+        expect(Array.isArray(result.tools)).toBe(true)
+
+        console.warn(`🔍 Companies query routed to: ${result.next_action}`)
+        console.warn(`🔍 Reasoning: ${result.reasoning}`)
+      }, 15000)
+
+      test('should route geographical queries correctly', async () => {
+        if (skipIfNoCredentials()) return
+
+        const router = new RouterAgent()
+        const input = createTestInput('Show me contributors from Brazil')
+
+        const result = await router.execute(input)
+
+        expect(result.next_action).toBeDefined()
+        expect(Object.values(RouterDecisionAction)).toContain(result.next_action)
+        expect(result.reasoning).toBeTruthy()
+        expect(result.reformulated_question).toContain('Brazil')
+
+        console.warn(`🔍 Geographic query routed to: ${result.next_action}`)
+        console.warn(`🔍 Reasoning: ${result.reasoning}`)
+      }, 15000)
+    })
+
+    describe('CREATE_QUERY (TEXT_TO_SQL) routing', () => {
+      test('should route commit activity by company query to CREATE_QUERY', async () => {
+        if (skipIfNoCredentials()) return
+
+        const router = new RouterAgent()
+        const input = createTestInput('Show me commit activity by company over all time period')
+
+        const result = await router.execute(input)
+
+        expect(result.next_action).toBe(RouterDecisionAction.CREATE_QUERY)
+        expect(result.reasoning).toBeTruthy()
+        expect(result.reformulated_question).toBeTruthy()
+
+        console.warn(`🔍 Commit activity by company query routed to: ${result.next_action}`)
+        console.warn(`🔍 Reasoning: ${result.reasoning}`)
+      }, 15000)
+    })
+
+    describe('STOP routing', () => {
+      test('should route impossible queries to STOP', async () => {
+        if (skipIfNoCredentials()) return
+
+        const router = new RouterAgent()
+        const input = createTestInput("What's the weather forecast for contributors?")
+
+        const result = await router.execute(input)
+
+        expect(result.next_action).toBe(RouterDecisionAction.STOP)
+        expect(result.reasoning).toContain('weather')
+        expect(result.tools).toEqual([])
+
+        console.warn(`🔍 Weather query routed to: ${result.next_action}`)
+        console.warn(`🔍 Reasoning: ${result.reasoning}`)
+      }, 15000)
+    })
+
+  })
+
+})

From 5d113ec45d4422382822c6854190272f7ee171b0 Mon Sep 17 00:00:00 2001
From: anilb <epipav@gmail.com>
Date: Thu, 9 Oct 2025 10:04:30 +0200
Subject: [PATCH 21/21] feat: data copilot improvements (#726)

Signed-off-by: anilb <epipav@gmail.com>
---
 ..._addAskClarificationRouterResponseType.sql |    9 +
 ...411__createChatResponseAgentStepsTable.sql |   27 +
 ...V1759927412__makeChatResponsesNullable.sql |   18 +
 .../components/chat-history/chat-result.vue   |   33 -
 .../components/results/results-section.vue    |   13 +-
 .../copilot/store/copilot.api.service.ts      |   48 +-
 .../shared/modules/copilot/store/test.json    |   60 -
 .../shared/modules/copilot/store/test2.json   |  378 ------
 .../shared/modules/copilot/store/test3.json   |  264 -----
 .../modules/copilot/types/copilot.types.ts    |    9 +-
 frontend/lib/chat/agents/auditor.ts           |   43 +
 frontend/lib/chat/agents/base-agent.ts        |    4 +-
 frontend/lib/chat/agents/index.ts             |    3 +-
 frontend/lib/chat/agents/router.ts            |    8 +-
 frontend/lib/chat/agents/text-to-sql.ts       |   34 +-
 frontend/lib/chat/data-copilot.ts             | 1009 ++++++++++++++---
 frontend/lib/chat/enums.ts                    |   10 +
 frontend/lib/chat/prompts/auditor.ts          |  159 +++
 frontend/lib/chat/prompts/pipe.ts             |   13 +-
 frontend/lib/chat/prompts/router.ts           |  149 ++-
 frontend/lib/chat/prompts/text-to-sql.ts      |  258 +++--
 .../lib/chat/prompts/tinybird-functions.md    |   78 ++
 .../lib/chat/prompts/tinybird-patterns.md     |  135 +++
 frontend/lib/chat/tests/auditor.test.ts       |  187 +++
 frontend/lib/chat/tests/router.test.ts        |  215 ++--
 frontend/lib/chat/types.ts                    |   41 +-
 frontend/lib/chat/utils/data-summary.ts       |  135 +++
 frontend/nuxt.config.ts                       |    1 +
 frontend/server/api/chat/stream.ts            |   13 +-
 frontend/server/middleware/database.ts        |   13 +-
 frontend/server/repo/chat.repo.ts             |  148 ++-
 31 files changed, 2306 insertions(+), 1209 deletions(-)
 create mode 100644 database/migrations/V1759392166__addAskClarificationRouterResponseType.sql
 create mode 100644 database/migrations/V1759927411__createChatResponseAgentStepsTable.sql
 create mode 100644 database/migrations/V1759927412__makeChatResponsesNullable.sql
 delete mode 100644 frontend/app/components/shared/modules/copilot/store/test.json
 delete mode 100644 frontend/app/components/shared/modules/copilot/store/test2.json
 delete mode 100644 frontend/app/components/shared/modules/copilot/store/test3.json
 create mode 100644 frontend/lib/chat/agents/auditor.ts
 create mode 100644 frontend/lib/chat/prompts/auditor.ts
 create mode 100644 frontend/lib/chat/prompts/tinybird-functions.md
 create mode 100644 frontend/lib/chat/prompts/tinybird-patterns.md
 create mode 100644 frontend/lib/chat/tests/auditor.test.ts
 create mode 100644 frontend/lib/chat/utils/data-summary.ts

diff --git a/database/migrations/V1759392166__addAskClarificationRouterResponseType.sql b/database/migrations/V1759392166__addAskClarificationRouterResponseType.sql
new file mode 100644
index 000000000..84fb0e825
--- /dev/null
+++ b/database/migrations/V1759392166__addAskClarificationRouterResponseType.sql
@@ -0,0 +1,9 @@
+-- Drop the existing check constraint
+ALTER TABLE chat_responses DROP CONSTRAINT chat_responses_router_response_check;
+
+-- Add the new check constraint with 'ask_clarification'
+ALTER TABLE chat_responses ADD CONSTRAINT chat_responses_router_response_check
+    CHECK (router_response IN ('pipes', 'create_query', 'stop', 'ask_clarification'));
+
+-- Add clarification_question column to store the clarification question
+ALTER TABLE chat_responses ADD COLUMN IF NOT EXISTS clarification_question TEXT;
diff --git a/database/migrations/V1759927411__createChatResponseAgentStepsTable.sql b/database/migrations/V1759927411__createChatResponseAgentStepsTable.sql
new file mode 100644
index 000000000..e1cc71511
--- /dev/null
+++ b/database/migrations/V1759927411__createChatResponseAgentStepsTable.sql
@@ -0,0 +1,27 @@
+-- Create enum for agent types
+CREATE TYPE agent_type AS ENUM ('ROUTER', 'PIPE', 'TEXT_TO_SQL', 'AUDITOR', 'CHART', 'EXECUTE_INSTRUCTIONS');
+
+-- Create table to track individual agent execution steps
+CREATE TABLE IF NOT EXISTS chat_response_agent_steps (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    chat_response_id UUID NOT NULL,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+    model TEXT,
+    agent agent_type NOT NULL,
+    response JSONB,
+    input_tokens INTEGER DEFAULT 0,
+    output_tokens INTEGER DEFAULT 0,
+    response_time_seconds NUMERIC NOT NULL DEFAULT 0,
+    instructions TEXT,
+    error_message TEXT,
+
+    CONSTRAINT fk_chat_response
+        FOREIGN KEY (chat_response_id)
+        REFERENCES chat_responses(id)
+        ON DELETE CASCADE
+);
+
+-- Create indexes for efficient querying
+CREATE INDEX idx_agent_steps_chat_response_id ON chat_response_agent_steps(chat_response_id);
+CREATE INDEX idx_agent_steps_created_at ON chat_response_agent_steps(created_at DESC);
+CREATE INDEX idx_agent_steps_agent_type ON chat_response_agent_steps(agent);
diff --git a/database/migrations/V1759927412__makeChatResponsesNullable.sql b/database/migrations/V1759927412__makeChatResponsesNullable.sql
new file mode 100644
index 000000000..d94efe725
--- /dev/null
+++ b/database/migrations/V1759927412__makeChatResponsesNullable.sql
@@ -0,0 +1,18 @@
+-- Make router fields nullable to allow early creation of chat_responses
+ALTER TABLE chat_responses ALTER COLUMN router_response DROP NOT NULL;
+ALTER TABLE chat_responses ALTER COLUMN router_reason DROP NOT NULL;
+
+-- Drop existing constraints
+ALTER TABLE chat_responses DROP CONSTRAINT IF EXISTS chat_responses_router_response_check;
+ALTER TABLE chat_responses DROP CONSTRAINT IF EXISTS check_pipe_instructions;
+
+-- Add new constraint that allows NULL or valid enum values
+ALTER TABLE chat_responses ADD CONSTRAINT chat_responses_router_response_check
+    CHECK (router_response IS NULL OR router_response IN ('pipes', 'create_query', 'stop', 'ask_clarification'));
+
+-- Recreate pipe_instructions check with NULL handling
+ALTER TABLE chat_responses ADD CONSTRAINT check_pipe_instructions CHECK (
+    router_response IS NULL OR
+    (router_response = 'pipes' AND pipe_instructions IS NOT NULL) OR
+    (router_response != 'pipes' AND pipe_instructions IS NULL)
+);
diff --git a/frontend/app/components/shared/modules/copilot/components/chat-history/chat-result.vue b/frontend/app/components/shared/modules/copilot/components/chat-history/chat-result.vue
index 495888bd6..b6c03b183 100644
--- a/frontend/app/components/shared/modules/copilot/components/chat-history/chat-result.vue
+++ b/frontend/app/components/shared/modules/copilot/components/chat-history/chat-result.vue
@@ -24,33 +24,11 @@ SPDX-License-Identifier: MIT
     >
       {{ reasoning }}
     </div>
-    <div class="my-4">{{ message.content }}</div>
-
-    <span 
-      class="flex items-center p-3 border border-solid border-neutral-200 
-      rounded-xl bg-white justify-between cursor-pointer hover:bg-neutral-50"
-      @click="emit('select')"
-    >
-      <lfx-chat-result-label
-        :version="version"
-        :label="getTitle(message.id)"
-      />
-      <lfx-icon
-        v-if="!isSelected"
-        name="arrow-rotate-left"
-        :size="16"
-        class="text-neutral-400"
-      />
-    </span>
-    
   </div>
 </template>
 <script setup lang="ts">
 import { computed, ref } from 'vue';
-import { storeToRefs } from 'pinia';
 import type { AIMessage } from '../../types/copilot.types'
-import LfxChatResultLabel from '../shared/result-label.vue'
-import { useCopilotStore } from '../../store/copilot.store';
 import LfxChatLabel from './chat-label.vue'
 import LfxIcon from '~/components/uikit/icon/icon.vue'
 
@@ -60,23 +38,12 @@ const props = defineProps<{
   isSelected: boolean | undefined
 }>()
 
-const { resultData } = storeToRefs(useCopilotStore());
-
 const isReasonExpanded = ref(false);
-// TODO: Implement feedback backend
-
-const emit = defineEmits<{
-  (e: 'select'): void
-}>()
 
 const reasoning = computed(() => {
   return props.message.explanation || props.message.sql;
 })
 
-const getTitle = (id: string) => {
-  const result = resultData.value.find(r => String(r.id) === String(id));
-  return result?.title || 'Loading...';
-}
 </script>
 
 <script lang="ts">
diff --git a/frontend/app/components/shared/modules/copilot/components/results/results-section.vue b/frontend/app/components/shared/modules/copilot/components/results/results-section.vue
index 99c9a6c48..2f1f8c637 100644
--- a/frontend/app/components/shared/modules/copilot/components/results/results-section.vue
+++ b/frontend/app/components/shared/modules/copilot/components/results/results-section.vue
@@ -75,7 +75,7 @@ SPDX-License-Identifier: MIT
 </template>
 
 <script setup lang="ts">
-import { computed, ref, watch } from 'vue';
+import { computed, ref } from 'vue';
 import { storeToRefs } from 'pinia';
 import LfxCopilotLoadingState from '../shared/loading-state.vue';
 import LfxCopilotEmptyResult from '../info/empty-result.vue';
@@ -98,9 +98,9 @@ const props = defineProps<{
 const { resultData, selectedResultId } = storeToRefs(useCopilotStore());
 
 const isChartError = ref(false);
-const selectedTab = ref('chart');
+const selectedTab = ref('data');
 const isSnapshotModalOpen = ref(false);
-const isChartLoading = ref(true);
+const isChartLoading = ref(false);
 const selectedResultConfig = computed<Config | null>(() => {
   return resultData.value.find(result => result.id === selectedResultId.value)?.chartConfig || null;
 });
@@ -147,11 +147,8 @@ const handleChartLoading = (value: boolean) => {
   emit('update:isChartLoading', value);
 }
 
-watch(isChartLoading, (value) => {
-  if (value) {
-    selectedTab.value = 'chart';
-  } 
-})
+// Removed watcher that forced chart tab selection during loading
+// Now users stay on data tab by default and can manually switch to chart if desired
 </script>
 
 <script lang="ts">
diff --git a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
index a841631ad..65435375c 100644
--- a/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
+++ b/frontend/app/components/shared/modules/copilot/store/copilot.api.service.ts
@@ -10,11 +10,8 @@ import type {
   MessageStatus,
 } from '../types/copilot.types'
 import type { CopilotParams } from '../types/copilot.types'
-// import testData from './test.json'
-import testData3 from './test3.json'
 import type { Project } from '~~/types/project'
 
-export const tempData = testData3 as AIMessage[]
 class CopilotApiService {
   // Generate unique ID for messages
   generateId = () => Date.now().toString(36) + Math.random().toString(36).substring(2);
@@ -228,21 +225,41 @@ class CopilotApiService {
           statusCallBack(statusText)
 
           if (
-            data.type === 'router-status' &&
-            (data.status === 'complete' || data.status === 'error')
+            (data.type === 'router-status' || data.type === 'auditor-status') &&
+            (
+             data.status === 'complete' || 
+             data.status === 'error' || 
+             data.status === 'ask_clarification' || 
+             data.status === 'validated' 
+            )
           ) {
             if (!assistantMessageId) {
               assistantMessageId = this.generateId()
 
+              let content: string
+              if (data.status === 'ask_clarification') {
+                content = data.question || 'I need more information to answer your question.'
+              }
+              else if (data.status === 'error') {
+                content = data.error || 'An error occurred.'
+              }
+              else if (data.status === 'validated') {
+                content = data.summary || 'Data validated successfully.'
+              }
+              else {
+                content = data.reasoning || 'Analysis complete.'
+              }
+
               messageCallBack(
                 {
                   id: assistantMessageId,
                   role: 'assistant',
                   type: 'router-status',
                   status: data.status,
-                  content: data.reasoning || '',
+                  content,
                   explanation: data.status === 'error' ? data.error : undefined,
                   routerReasoning: data.reasoning,
+                  question: data.question, // Include the clarification question
                   timestamp: Date.now(),
                 },
                 -1,
@@ -338,6 +355,8 @@ class CopilotApiService {
     switch (type) {
       case 'router-status':
         return this.getStatusTextRouterStatus(status, reasoning, error)
+      case 'auditor-status':
+        return this.getStatusTextAuditorStatus(status, reasoning)
       case 'sql-result':
         return 'SQL query executed successfully'
       case 'pipe-result':
@@ -353,10 +372,27 @@ class CopilotApiService {
         return 'Analyzing your question...'
       case 'complete':
         return reasoning ? `Analysis: ${reasoning}` : 'Analysis complete'
+      case 'ask_clarification':
+        return reasoning || 'I need more information to answer your question.'
       default:
         return `Error: ${error || 'An error occurred'}`
     }
   }
+
+  getStatusTextAuditorStatus(status: string, reasoning: string): string {
+    switch (status) {
+      case 'validating':
+        return 'Validating data quality...'
+      case 'validated':
+        return reasoning ? `Validation passed: ${reasoning}` : 'Data validated successfully'
+      case 'retrying':
+        return 'Retrying with improved query...'
+      case 'max_retries':
+        return reasoning ? `Validation feedback: ${reasoning}` : 'Maximum validation attempts reached'
+      default:
+        return 'Validating...'
+    }
+  }
 }
 
 export const copilotApiService = new CopilotApiService()
diff --git a/frontend/app/components/shared/modules/copilot/store/test.json b/frontend/app/components/shared/modules/copilot/store/test.json
deleted file mode 100644
index 6ef869653..000000000
--- a/frontend/app/components/shared/modules/copilot/store/test.json
+++ /dev/null
@@ -1,60 +0,0 @@
-[
-  {
-    "id": "me2kfiwi0izfbw1epv7h",
-    "role": "user",
-    "type": "text",
-    "status": "complete",
-    "content": "Who are the top 3 contributors?",
-    "timestamp": 1754641811826
-  },
-  {
-    "id": "me2kfxz22tj6e3jpky9",
-    "role": "assistant",
-    "type": "router-status",
-    "status": "complete",
-    "content": "I'll create a query to identify the top 3 contributors by activity count for the Kubernetes project. The activityRelations_deduplicated_cleaned_ds table contains the necessary fields (memberId, username, segmentId) to rank contributors by their contribution activity.",
-    "timestamp": 1754641831358
-  },
-  {
-    "id": "me2kg525frdy8woetmu",
-    "role": "assistant",
-    "type": "sql-result",
-    "content": "This query finds the top 3 contributors by counting all activities per username in the activityRelations_deduplicated_cleaned_ds table, filtered by the segment ID for Kubernetes project and the specified date range, then ordering by activity count in descending order.",
-    "timestamp": 1754641840541,
-    "sql": "SELECT \n    username,\n    COUNT(*) as activity_count\nFROM activityRelations_deduplicated_cleaned_ds\nWHERE \n    segmentId = 'd3e1742e-253b-4a16-af63-b50e15a5e505'\n    AND timestamp >= '2024-07-23 00:00:00'\n    AND timestamp <= '2025-07-23 23:59:59'\n    AND username IS NOT NULL\n    AND username != ''\nGROUP BY username\nORDER BY activity_count DESC\nLIMIT 3",
-    "data": [
-      { "username": "k8s-ci-robot", "activity_count": 96909 },
-      { "username": "dims", "activity_count": 4081 },
-      { "username": "liggitt", "activity_count": 3976 }
-    ]
-  },
-  {
-    "id": "me2kgkyrpqpvry4bfy",
-    "role": "user",
-    "type": "text",
-    "status": "complete",
-    "content": "Can you remove the username k8s-ci-robot",
-    "timestamp": 1754641861155
-  },
-  {
-    "id": "me2kgu1jihu6j0vivys",
-    "role": "assistant",
-    "type": "router-status",
-    "status": "complete",
-    "content": "I'll create a query to find the top 3 contributors excluding the k8s-ci-robot username from the results.",
-    "timestamp": 1754641872919
-  },
-  {
-    "id": "me2kgzzcd3pr7aa9m9p",
-    "role": "assistant",
-    "type": "sql-result",
-    "content": "This query finds the top 3 contributors by counting all activities per username in the Kubernetes project, excluding 'k8s-ci-robot' using a NOT EQUAL filter, and ordering by activity count in descending order within the specified date range.",
-    "timestamp": 1754641880616,
-    "sql": "SELECT username, COUNT(*) as activity_count FROM activityRelations_deduplicated_cleaned_ds WHERE segmentId = 'd3e1742e-253b-4a16-af63-b50e15a5e505' AND timestamp >= '2024-07-23 23:59:59' AND timestamp <= '2025-07-23 23:59:59' AND username != 'k8s-ci-robot' GROUP BY username ORDER BY activity_count DESC LIMIT 3",
-    "data": [
-      { "username": "dependabot[bot]", "activity_count": 25033 },
-      { "username": "k8s-infra-cherrypick-robot", "activity_count": 20076 },
-      { "username": "dims", "activity_count": 6418 }
-    ]
-  }
-]
diff --git a/frontend/app/components/shared/modules/copilot/store/test2.json b/frontend/app/components/shared/modules/copilot/store/test2.json
deleted file mode 100644
index b3e6fed63..000000000
--- a/frontend/app/components/shared/modules/copilot/store/test2.json
+++ /dev/null
@@ -1,378 +0,0 @@
-[
-  {
-    "id": "me8389ggz916xpsvo6",
-    "role": "user",
-    "type": "text",
-    "status": "complete",
-    "content": "Give me the top 5 contributors per month",
-    "timestamp": 1754975716576
-  },
-  {
-    "id": "me838imbz4vxu7ph2pk",
-    "role": "assistant",
-    "type": "router-status",
-    "status": "complete",
-    "content": "I need to create a custom SQL query to get the top 5 contributors per month for the Kubernetes project. The activityRelations_deduplicated_cleaned_ds table contains the necessary fields (memberId, username, timestamp, segmentId) to identify contributors and their monthly activity counts.",
-    "timestamp": 1754975728451
-  },
-  {
-    "id": "me83956x7nn0juea848",
-    "role": "assistant",
-    "type": "sql-result",
-    "content": "This query uses a CTE approach to first aggregate activity counts by month and username, then ranks contributors within each month by activity count (descending) and selects the top 5 for each month. The results are ordered chronologically by month and then by activity count within each month.",
-    "timestamp": 1754975757706,
-    "sql": "WITH monthly_activity AS (\n  SELECT \n    toStartOfMonth(timestamp) as month,\n    username,\n    count(*) as activity_count\n  FROM activityRelations_deduplicated_cleaned_ds\n  WHERE segmentId = 'd3e1742e-253b-4a16-af63-b50e15a5e505'\n    AND timestamp >= '2024-08-01'\n    AND timestamp <= '2025-08-31'\n    AND username IS NOT NULL\n    AND username != ''\n  GROUP BY month, username\n),\nranked_contributors AS (\n  SELECT \n    month,\n    username,\n    activity_count,\n    row_number() OVER (PARTITION BY month ORDER BY activity_count DESC, username) as rank\n  FROM monthly_activity\n)\nSELECT \n  month,\n  username,\n  activity_count\nFROM ranked_contributors\nWHERE rank <= 5\nORDER BY month ASC, activity_count DESC\nLIMIT 1000",
-    "data": [
-      {
-        "month": "2024-08-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 68772
-      },
-      {
-        "month": "2024-08-01",
-        "username": "cncf-infra",
-        "activity_count": 2236
-      },
-      {
-        "month": "2024-08-01",
-        "username": "dims",
-        "activity_count": 2041
-      },
-      {
-        "month": "2024-08-01",
-        "username": "pacoxu",
-        "activity_count": 1938
-      },
-      {
-        "month": "2024-08-01",
-        "username": "sttts",
-        "activity_count": 1845
-      },
-      {
-        "month": "2024-09-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 70063
-      },
-      {
-        "month": "2024-09-01",
-        "username": "dims",
-        "activity_count": 2644
-      },
-      {
-        "month": "2024-09-01",
-        "username": "liggitt",
-        "activity_count": 2281
-      },
-      {
-        "month": "2024-09-01",
-        "username": "cncf-infra",
-        "activity_count": 2142
-      },
-      {
-        "month": "2024-09-01",
-        "username": "pacoxu",
-        "activity_count": 1839
-      },
-      {
-        "month": "2024-10-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 69554
-      },
-      {
-        "month": "2024-10-01",
-        "username": "dims",
-        "activity_count": 2755
-      },
-      {
-        "month": "2024-10-01",
-        "username": "cncf-infra",
-        "activity_count": 2135
-      },
-      {
-        "month": "2024-10-01",
-        "username": "liggitt",
-        "activity_count": 1905
-      },
-      {
-        "month": "2024-10-01",
-        "username": "neolit123",
-        "activity_count": 1859
-      },
-      {
-        "month": "2024-11-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 68651
-      },
-      {
-        "month": "2024-11-01",
-        "username": "dims",
-        "activity_count": 2728
-      },
-      {
-        "month": "2024-11-01",
-        "username": "cncf-infra",
-        "activity_count": 2100
-      },
-      {
-        "month": "2024-11-01",
-        "username": "liggitt",
-        "activity_count": 1949
-      },
-      {
-        "month": "2024-11-01",
-        "username": "SataQiu",
-        "activity_count": 1754
-      },
-      {
-        "month": "2024-12-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 64549
-      },
-      {
-        "month": "2024-12-01",
-        "username": "dims",
-        "activity_count": 2464
-      },
-      {
-        "month": "2024-12-01",
-        "username": "cncf-infra",
-        "activity_count": 1932
-      },
-      {
-        "month": "2024-12-01",
-        "username": "neolit123",
-        "activity_count": 1756
-      },
-      {
-        "month": "2024-12-01",
-        "username": "liggitt",
-        "activity_count": 1682
-      },
-      {
-        "month": "2025-01-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 68080
-      },
-      {
-        "month": "2025-01-01",
-        "username": "dims",
-        "activity_count": 2498
-      },
-      {
-        "month": "2025-01-01",
-        "username": "cncf-infra",
-        "activity_count": 2086
-      },
-      {
-        "month": "2025-01-01",
-        "username": "liggitt",
-        "activity_count": 2023
-      },
-      {
-        "month": "2025-01-01",
-        "username": "neolit123",
-        "activity_count": 1877
-      },
-      {
-        "month": "2025-02-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 62336
-      },
-      {
-        "month": "2025-02-01",
-        "username": "dims",
-        "activity_count": 2295
-      },
-      {
-        "month": "2025-02-01",
-        "username": "cncf-infra",
-        "activity_count": 1827
-      },
-      {
-        "month": "2025-02-01",
-        "username": "liggitt",
-        "activity_count": 1797
-      },
-      {
-        "month": "2025-02-01",
-        "username": "neolit123",
-        "activity_count": 1731
-      },
-      {
-        "month": "2025-03-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 71209
-      },
-      {
-        "month": "2025-03-01",
-        "username": "dims",
-        "activity_count": 2544
-      },
-      {
-        "month": "2025-03-01",
-        "username": "cncf-infra",
-        "activity_count": 2185
-      },
-      {
-        "month": "2025-03-01",
-        "username": "liggitt",
-        "activity_count": 2104
-      },
-      {
-        "month": "2025-03-01",
-        "username": "neolit123",
-        "activity_count": 1936
-      },
-      {
-        "month": "2025-04-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 68869
-      },
-      {
-        "month": "2025-04-01",
-        "username": "dims",
-        "activity_count": 2498
-      },
-      {
-        "month": "2025-04-01",
-        "username": "cncf-infra",
-        "activity_count": 2144
-      },
-      {
-        "month": "2025-04-01",
-        "username": "liggitt",
-        "activity_count": 2021
-      },
-      {
-        "month": "2025-04-01",
-        "username": "neolit123",
-        "activity_count": 1869
-      },
-      {
-        "month": "2025-05-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 71207
-      },
-      {
-        "month": "2025-05-01",
-        "username": "dims",
-        "activity_count": 2559
-      },
-      {
-        "month": "2025-05-01",
-        "username": "cncf-infra",
-        "activity_count": 2137
-      },
-      {
-        "month": "2025-05-01",
-        "username": "liggitt",
-        "activity_count": 2083
-      },
-      {
-        "month": "2025-05-01",
-        "username": "neolit123",
-        "activity_count": 1909
-      },
-      {
-        "month": "2025-06-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 68669
-      },
-      {
-        "month": "2025-06-01",
-        "username": "dims",
-        "activity_count": 2442
-      },
-      {
-        "month": "2025-06-01",
-        "username": "cncf-infra",
-        "activity_count": 2086
-      },
-      {
-        "month": "2025-06-01",
-        "username": "liggitt",
-        "activity_count": 2013
-      },
-      {
-        "month": "2025-06-01",
-        "username": "neolit123",
-        "activity_count": 1847
-      },
-      {
-        "month": "2025-07-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 71036
-      },
-      {
-        "month": "2025-07-01",
-        "username": "dims",
-        "activity_count": 2563
-      },
-      {
-        "month": "2025-07-01",
-        "username": "cncf-infra",
-        "activity_count": 2126
-      },
-      {
-        "month": "2025-07-01",
-        "username": "liggitt",
-        "activity_count": 2036
-      },
-      {
-        "month": "2025-07-01",
-        "username": "neolit123",
-        "activity_count": 1943
-      },
-      {
-        "month": "2025-08-01",
-        "username": "k8s-ci-robot",
-        "activity_count": 28103
-      },
-      {
-        "month": "2025-08-01",
-        "username": "dims",
-        "activity_count": 1012
-      },
-      {
-        "month": "2025-08-01",
-        "username": "cncf-infra",
-        "activity_count": 842
-      },
-      {
-        "month": "2025-08-01",
-        "username": "liggitt",
-        "activity_count": 806
-      },
-      {
-        "month": "2025-08-01",
-        "username": "neolit123",
-        "activity_count": 770
-      }
-    ]
-  },
-  {
-    "id": "me839okxadzt6lzxc6s",
-    "role": "user",
-    "type": "text",
-    "status": "complete",
-    "content": "Can you remove the username k8s-ci-robot",
-    "timestamp": 1754975782833
-  },
-  {
-    "id": "me839wxzwyeobmbjs7i",
-    "role": "assistant",
-    "type": "router-status",
-    "status": "complete",
-    "content": "I'll create a query to get the top 5 contributors per month for the Kubernetes project, excluding the k8s-ci-robot username from the results.",
-    "timestamp": 1754975793671
-  },
-  {
-    "id": "me83ag5u0o263m4vjlqk",
-    "role": "assistant",
-    "type": "sql-result",
-    "content": "This query excludes the 'k8s-ci-robot' username by adding a WHERE clause filter (username != 'k8s-ci-robot'). It aggregates activity counts by month and username, ranks contributors within each month by activity volume, and returns the top 5 contributors per month from August 2024 to August 2025 for the Kubernetes project.",
-    "timestamp": 1754975818579,
-    "sql": "WITH monthly_contributors AS (\n  SELECT \n    toStartOfMonth(timestamp) as month,\n    username,\n    count() as activity_count\n  FROM activityRelations_deduplicated_cleaned_ds\n  WHERE segmentId = 'd3e1742e-253b-4a16-af63-b50e15a5e505'\n    AND timestamp >= '2024-08-01 00:00:00'\n    AND timestamp < '2025-09-01 00:00:00'\n    AND username != 'k8s-ci-robot'\n  GROUP BY month, username\n),\nranked_contributors AS (\n  SELECT \n    month,\n    username,\n    activity_count,\n    row_number() OVER (PARTITION BY month ORDER BY activity_count DESC) as rank\n  FROM monthly_contributors\n)\nSELECT \n  month,\n  username,\n  activity_count,\n  rank\nFROM ranked_contributors\nWHERE rank <= 5\nORDER BY month ASC, rank ASC\nLIMIT 100",
-    "data": "The query returns 13 months of data (Aug 2024 - Aug 2025) with the top 5 contributors per month. Key contributors include dims (consistently #1 with 800-1000+ activities), thockin (#2 with 600-800 activities), liggitt (#3 with 600-700 activities), justaugustus (#4 with 500-600 activities), and BenTheElder (#5 with 400-500 activities). August 2025 shows partial month data with lower counts as expected."
-  }
-]
\ No newline at end of file
diff --git a/frontend/app/components/shared/modules/copilot/store/test3.json b/frontend/app/components/shared/modules/copilot/store/test3.json
deleted file mode 100644
index 74346164f..000000000
--- a/frontend/app/components/shared/modules/copilot/store/test3.json
+++ /dev/null
@@ -1,264 +0,0 @@
-[
-  {
-    "id": "mehys54cf7kqj8riihp",
-    "role": "user",
-    "type": "text",
-    "status": "complete",
-    "content": "Show me the monthly active contributors over the past 6 months",
-    "timestamp": 1755572907756
-  },
-  {
-    "id": "mehysfemctwiu2zfxnc",
-    "role": "assistant",
-    "type": "router-status",
-    "status": "complete",
-    "content": "I'll use the active_contributors tool to show monthly active contributors for the Kubernetes project over the past 6 months with monthly granularity.",
-    "timestamp": 1755572921086
-  },
-  {
-    "id": "mehysmf4hnmwsaqyq2h",
-    "role": "assistant",
-    "type": "pipe-result",
-    "data": [
-      {
-        "Month Start": "2025-02-01",
-        "Month End": "2025-02-28",
-        "Active Contributors": 1544
-      },
-      {
-        "Month Start": "2025-03-01",
-        "Month End": "2025-03-31",
-        "Active Contributors": 3010
-      },
-      {
-        "Month Start": "2025-04-01",
-        "Month End": "2025-04-30",
-        "Active Contributors": 2797
-      },
-      {
-        "Month Start": "2025-05-01",
-        "Month End": "2025-05-31",
-        "Active Contributors": 2940
-      },
-      {
-        "Month Start": "2025-06-01",
-        "Month End": "2025-06-30",
-        "Active Contributors": 2785
-      },
-      {
-        "Month Start": "2025-07-01",
-        "Month End": "2025-07-31",
-        "Active Contributors": 2524
-      },
-      {
-        "Month Start": "2025-08-01",
-        "Month End": "2025-08-31",
-        "Active Contributors": 1628
-      }
-    ],
-    "content": "Using the active_contributors pipe with monthly granularity for the past 6 months (February 2025 to August 2025) to show the trend of unique contributors over time for the Kubernetes project.",
-    "explanation": "Using the active_contributors pipe with monthly granularity for the past 6 months (February 2025 to August 2025) to show the trend of unique contributors over time for the Kubernetes project.",
-    "instructions": {
-      "pipes": [
-        {
-          "id": "monthly_contributors",
-          "name": "active_contributors",
-          "inputs": {
-            "project": "k8s",
-            "startDate": "2025-02-19 00:00:00",
-            "endDate": "2025-08-19 23:59:59",
-            "granularity": "monthly"
-          }
-        }
-      ],
-      "output": [
-        {
-          "type": "direct",
-          "name": "Month Start",
-          "pipeId": "monthly_contributors",
-          "sourceColumn": "startDate"
-        },
-        {
-          "type": "direct",
-          "name": "Month End",
-          "pipeId": "monthly_contributors",
-          "sourceColumn": "endDate"
-        },
-        {
-          "type": "direct",
-          "name": "Active Contributors",
-          "pipeId": "monthly_contributors",
-          "sourceColumn": "contributorCount"
-        }
-      ]
-    },
-    "timestamp": 1755572930176
-  },
-  {
-    "id": "mehysmh8tukhbfqiqeh",
-    "role": "assistant",
-    "type": "chat-response-id",
-    "content": "a8c72343-6695-47d2-b804-fb5e8a802baf",
-    "timestamp": 1755572930253
-  },
-  {
-    "id": "mehytgctwsyk87tr08n",
-    "role": "user",
-    "type": "text",
-    "status": "complete",
-    "content": "Show the comparison with the previous 6 months",
-    "timestamp": 1755572968973
-  },
-  {
-    "id": "mehytpojvflfyw3tkk",
-    "role": "assistant",
-    "type": "router-status",
-    "status": "complete",
-    "content": "I'll use the active_contributors widget twice with different time ranges to compare the current 6 months (February-August 2025) with the previous 6 months (August 2024-January 2025).",
-    "timestamp": 1755572981059
-  },
-  {
-    "id": "mehyu8c9uzellpsp9b",
-    "role": "assistant",
-    "type": "pipe-result",
-    "data": [
-      {
-        "Month": "2025-02-01",
-        "Current Period Contributors": 2804,
-        "Previous Period Contributors": 2982,
-        "Change": -178,
-        "Growth %": -5.969148222669349
-      },
-      {
-        "Month": "2025-03-01",
-        "Current Period Contributors": 3010,
-        "Previous Period Contributors": 3835,
-        "Change": -825,
-        "Growth %": -21.51238591916558
-      },
-      {
-        "Month": "2025-04-01",
-        "Current Period Contributors": 2797,
-        "Previous Period Contributors": 4742,
-        "Change": -1945,
-        "Growth %": -41.01644875579924
-      },
-      {
-        "Month": "2025-05-01",
-        "Current Period Contributors": 2940,
-        "Previous Period Contributors": 5226,
-        "Change": -2286,
-        "Growth %": -43.74282433983927
-      },
-      {
-        "Month": "2025-06-01",
-        "Current Period Contributors": 2785,
-        "Previous Period Contributors": 3975,
-        "Change": -1190,
-        "Growth %": -29.937106918238992
-      },
-      {
-        "Month": "2025-07-01",
-        "Current Period Contributors": 2524,
-        "Previous Period Contributors": 3007,
-        "Change": -483,
-        "Growth %": -16.062520784835385
-      },
-      {
-        "Month": "2025-08-01",
-        "Current Period Contributors": 1628,
-        "Previous Period Contributors": null,
-        "Change": 1628,
-        "Growth %": null
-      }
-    ],
-    "content": "Using two active_contributors pipes to get monthly data for both the current period (Feb-Aug 2025) and previous period (Aug 2024-Jan 2025), then combining them with calculated growth percentages for comparison",
-    "explanation": "Using two active_contributors pipes to get monthly data for both the current period (Feb-Aug 2025) and previous period (Aug 2024-Jan 2025), then combining them with calculated growth percentages for comparison",
-    "instructions": {
-      "pipes": [
-        {
-          "id": "current_period",
-          "name": "active_contributors",
-          "inputs": {
-            "project": "k8s",
-            "startDate": "2025-02-01 00:00:00",
-            "endDate": "2025-08-19 23:59:59",
-            "granularity": "monthly"
-          }
-        },
-        {
-          "id": "previous_period",
-          "name": "active_contributors",
-          "inputs": {
-            "project": "k8s",
-            "startDate": "2024-08-01 00:00:00",
-            "endDate": "2025-01-31 23:59:59",
-            "granularity": "monthly"
-          }
-        }
-      ],
-      "output": [
-        {
-          "type": "direct",
-          "name": "Month",
-          "pipeId": "current_period",
-          "sourceColumn": "startDate"
-        },
-        {
-          "type": "direct",
-          "name": "Current Period Contributors",
-          "pipeId": "current_period",
-          "sourceColumn": "contributorCount"
-        },
-        {
-          "type": "direct",
-          "name": "Previous Period Contributors",
-          "pipeId": "previous_period",
-          "sourceColumn": "contributorCount"
-        },
-        {
-          "type": "formula",
-          "name": "Change",
-          "formula": "current - previous",
-          "dependencies": [
-            {
-              "variable": "current",
-              "pipeId": "current_period",
-              "sourceColumn": "contributorCount"
-            },
-            {
-              "variable": "previous",
-              "pipeId": "previous_period",
-              "sourceColumn": "contributorCount"
-            }
-          ]
-        },
-        {
-          "type": "formula",
-          "name": "Growth %",
-          "formula": "((current - previous) / previous) * 100",
-          "dependencies": [
-            {
-              "variable": "current",
-              "pipeId": "current_period",
-              "sourceColumn": "contributorCount"
-            },
-            {
-              "variable": "previous",
-              "pipeId": "previous_period",
-              "sourceColumn": "contributorCount"
-            }
-          ]
-        }
-      ]
-    },
-    "timestamp": 1755573005241
-  },
-  {
-    "id": "mehyu8dsa781n19zyi9",
-    "role": "assistant",
-    "type": "chat-response-id",
-    "content": "7128ab69-d5a4-4689-bfe0-b8c820a183b6",
-    "timestamp": 1755573005296
-  }
-]
\ No newline at end of file
diff --git a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
index b6a575e2a..2fcbbf481 100644
--- a/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
+++ b/frontend/app/components/shared/modules/copilot/types/copilot.types.ts
@@ -21,7 +21,7 @@ export type MessagePartType = 'router-status' |
                               'pipe-result' | 
                               'chat-response-id' | 
                               'conversation-id';
-export type MessageStatus = 'analyzing' | 'complete' | 'error';
+export type MessageStatus = 'analyzing' | 'complete' | 'error' | 'ask_clarification';
 export type MessageData = Record<string, string | number | null | string[] | object>;
 export type MessageRole = 'user' | 'assistant';
 
@@ -38,17 +38,18 @@ export interface ResultsHistory {
 }
 
 export interface AIMessage {
-  id: string; 
-  role: MessageRole; 
+  id: string;
+  role: MessageRole;
   type: MessagePartType;
   status: MessageStatus;
-  content: string; 
+  content: string;
   timestamp: number;
   conversationId?: string;
   sql?: string;
   data?: Array<MessageData>;
   explanation?: string;
   routerReasoning?: string;
+  question?: string; // Clarification question when status is 'ask_clarification'
   instructions?: {
     pipes: Array<{
       id: string
diff --git a/frontend/lib/chat/agents/auditor.ts b/frontend/lib/chat/agents/auditor.ts
new file mode 100644
index 000000000..7d9ec35e9
--- /dev/null
+++ b/frontend/lib/chat/agents/auditor.ts
@@ -0,0 +1,43 @@
+// Copyright (c) 2025 The Linux Foundation and each contributor.
+// SPDX-License-Identifier: MIT
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import type { AuditorOutput, AuditorAgentInput } from '../types'
+import { auditorOutputSchema } from '../types'
+import { auditorPrompt } from '../prompts/auditor'
+import { BaseAgent } from './base-agent'
+
+export class AuditorAgent extends BaseAgent<AuditorAgentInput, AuditorOutput> {
+  readonly name = 'Auditor'
+  readonly outputSchema = auditorOutputSchema
+  readonly temperature = 0.2 // Slightly creative for summaries
+  readonly maxSteps = 1
+
+  protected getModel(input: AuditorAgentInput): any {
+    return input.model
+  }
+
+  protected getSystemPrompt(input: AuditorAgentInput): string {
+    return auditorPrompt(
+      input.originalQuestion,
+      input.reformulatedQuestion,
+      input.dataSummary,
+      input.attemptNumber,
+      input.previousFeedback,
+    )
+  }
+
+  protected getUserPrompt(_input: AuditorAgentInput): string {
+    return ''
+  }
+
+  protected getTools(_input: AuditorAgentInput): Record<string, any> {
+    return {}
+  }
+
+  protected createError(error: unknown): Error {
+    if (error instanceof Error) {
+      return new Error(`Auditor agent error: ${error.message}`)
+    }
+    return new Error(`Auditor agent error: ${String(error)}`)
+  }
+}
diff --git a/frontend/lib/chat/agents/base-agent.ts b/frontend/lib/chat/agents/base-agent.ts
index 1f362d573..3a83bdd0a 100644
--- a/frontend/lib/chat/agents/base-agent.ts
+++ b/frontend/lib/chat/agents/base-agent.ts
@@ -99,7 +99,7 @@ export abstract class BaseAgent<TInput, TOutput> {
       const jsonInstructions = this.generateJSONInstructions()
       const conversationHistoryReceipt = this.generateConversationHistoryReceipt(input.messages)
 
-      const fullSystemPrompt = conversationHistoryReceipt + systemPrompt + jsonInstructions
+      const fullSystemPrompt = systemPrompt + conversationHistoryReceipt + jsonInstructions
 
       // Check if we have messages in the input
       const hasMessages =
@@ -223,7 +223,7 @@ export abstract class BaseAgent<TInput, TOutput> {
 
       ${conversationHistory}
 
-      ## END OF CONVERSATION HISTORY`
+      ## END OF CONVERSATION HISTORY\n`
     } catch (error) {
       console.error('Error generating conversation history context', error)
       return ''
diff --git a/frontend/lib/chat/agents/index.ts b/frontend/lib/chat/agents/index.ts
index 4091c8ac4..1b27c4c6c 100644
--- a/frontend/lib/chat/agents/index.ts
+++ b/frontend/lib/chat/agents/index.ts
@@ -1,7 +1,8 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
+import { AuditorAgent } from './auditor'
 import { PipeAgent } from './pipe'
 import { RouterAgent } from './router'
 import { TextToSqlAgent } from './text-to-sql'
 
-export { PipeAgent, RouterAgent, TextToSqlAgent }
+export { AuditorAgent, PipeAgent, RouterAgent, TextToSqlAgent }
diff --git a/frontend/lib/chat/agents/router.ts b/frontend/lib/chat/agents/router.ts
index d7a7a01fc..347d302f8 100644
--- a/frontend/lib/chat/agents/router.ts
+++ b/frontend/lib/chat/agents/router.ts
@@ -10,7 +10,7 @@ export class RouterAgent extends BaseAgent<RouterAgentInput, RouterOutput> {
   readonly name = 'Router'
   readonly outputSchema = routerOutputSchema
   readonly temperature = 0
-  readonly maxSteps = 3 // Allow router to use list_datasources if needed
+  readonly maxSteps = 6 // Allow router to use list_datasources if needed
 
   protected getModel(input: RouterAgentInput): any {
     return input.model
@@ -24,6 +24,7 @@ export class RouterAgent extends BaseAgent<RouterAgentInput, RouterOutput> {
       input.parametersString,
       input.segmentId,
       input.toolsOverview,
+      input.previousWasClarification,
     )
   }
 
@@ -33,11 +34,14 @@ export class RouterAgent extends BaseAgent<RouterAgentInput, RouterOutput> {
   }
 
   protected getTools(input: RouterAgentInput): Record<string, any> {
-    // Only allow calling list_datasources; all other tools remain visible in prompt via toolsOverview
+    // Allow list_datasources and execute_query for activityTypes lookups
     const allowed: Record<string, any> = {}
     if (input.tools && input.tools['list_datasources']) {
       allowed['list_datasources'] = input.tools['list_datasources']
     }
+    if (input.tools && input.tools['execute_query']) {
+      allowed['execute_query'] = input.tools['execute_query']
+    }
     return allowed
   }
 
diff --git a/frontend/lib/chat/agents/text-to-sql.ts b/frontend/lib/chat/agents/text-to-sql.ts
index bcb6de756..0395e84a7 100644
--- a/frontend/lib/chat/agents/text-to-sql.ts
+++ b/frontend/lib/chat/agents/text-to-sql.ts
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: MIT
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { z } from 'zod'
-import { textToSqlInstructionsSchema } from '../types'
+import { textToSqlInstructionsSchema, type SqlErrorContext } from '../types'
 import { textToSqlPrompt } from '../prompts/text-to-sql'
 import { BaseAgent } from './base-agent'
 
@@ -26,6 +26,7 @@ interface TextToSqlAgentInput {
   parametersString: string
   segmentId: string | null
   reformulatedQuestion: string
+  errorContext?: SqlErrorContext
 }
 
 export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
@@ -33,7 +34,7 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
    * Generate SQL query using tools and text extraction
    */
   override async execute(
-    input: TextToSqlAgentInput & { messages: any[] }
+    input: TextToSqlAgentInput & { messages: any[] },
   ): Promise<SqlOutput & { usage?: any }> {
     try {
       const { generateText } = await import('ai')
@@ -45,9 +46,13 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
       const workingTools = { ...tools }
       delete workingTools['text_to_sql']
 
-      const fullSystemPrompt = conversationHistoryReceipt + systemPrompt + `
+      const fullSystemPrompt =
+        conversationHistoryReceipt +
+        systemPrompt +
+        `
 
 ## CRITICAL INSTRUCTIONS
+0. NEVER use the functions that are NOT provided under AVAILABLE FUNCTIONS
 1. Use tools BRIEFLY to understand schema (max 2 tool calls)
 2. Then STOP calling tools and write the SQL query
 3. Put your SQL in a markdown code block: \`\`\`sql ... \`\`\`
@@ -66,9 +71,9 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
         generateConfig.providerOptions = providerOptions
       }
 
-      generateConfig.messages = input.messages.filter(
-        (msg: any) => msg.content && msg.content.trim() !== '' && msg.role === 'user',
-      ).slice(-1)
+      generateConfig.messages = input.messages
+        .filter((msg: any) => msg.content && msg.content.trim() !== '' && msg.role === 'user')
+        .slice(-1)
 
       const response = await generateText(generateConfig)
 
@@ -78,7 +83,7 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
 
       // Extract SQL from text response
       const result = this.extractSqlFromTextResponse(response)
-      console.warn("🔍 Extracted SQL:", result.instructions)
+      console.warn('🔍 Extracted SQL:', result.instructions)
 
       return {
         ...result,
@@ -89,7 +94,6 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
     }
   }
 
-
   /**
    * Extract SQL query from text response when tools fail
    */
@@ -101,7 +105,7 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
     if (sqlBlockMatch && sqlBlockMatch[1]) {
       const sqlQuery = this.cleanSqlQuery(sqlBlockMatch[1].trim())
       return {
-        explanation: "Generated SQL query based on database schema analysis",
+        explanation: 'Generated SQL query based on database schema analysis',
         instructions: sqlQuery,
       }
     }
@@ -112,14 +116,14 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
 
     if (withMatch && withMatch[1]) {
       return {
-        explanation: "Extracted SQL query from agent response",
+        explanation: 'Extracted SQL query from agent response',
         instructions: this.cleanSqlQuery(withMatch[1].trim()),
       }
     }
 
     if (selectMatch && selectMatch[1]) {
       return {
-        explanation: "Extracted SQL query from agent response",
+        explanation: 'Extracted SQL query from agent response',
         instructions: this.cleanSqlQuery(selectMatch[1].trim()),
       }
     }
@@ -128,7 +132,7 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
     const generalSqlMatch = text.match(/\b((?:WITH|SELECT)[\s\S]*?)(?=\n\n|\n(?![A-Z\s,()])|$)/i)
     if (generalSqlMatch && generalSqlMatch[1]) {
       return {
-        explanation: "Extracted SQL query from agent response",
+        explanation: 'Extracted SQL query from agent response',
         instructions: this.cleanSqlQuery(generalSqlMatch[1].trim()),
       }
     }
@@ -170,11 +174,11 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
       input.parametersString,
       input.segmentId,
       input.reformulatedQuestion,
+      input.errorContext,
     )
   }
 
   protected getUserPrompt(input: TextToSqlAgentInput): string {
-    // Not used when messages are provided, but required by base class
     return input.reformulatedQuestion
   }
 
@@ -192,12 +196,12 @@ export class TextToSqlAgent extends BaseAgent<TextToSqlAgentInput, SqlOutput> {
   protected override getProviderOptions(_input: TextToSqlAgentInput): any {
     return {
       bedrock: {
-        reasoningConfig: { type: 'enabled', budgetTokens: 1500 }, // Reduced from 3000 for faster responses
+        reasoningConfig: { type: 'enabled', budgetTokens: 1500 },
       },
     }
   }
 
   protected override shouldMonitorToolCalls(_input: TextToSqlAgentInput): boolean {
-    return false // Enable tool call monitoring for SQL agent
+    return false
   }
 }
diff --git a/frontend/lib/chat/data-copilot.ts b/frontend/lib/chat/data-copilot.ts
index 5965ac465..eb6a65156 100644
--- a/frontend/lib/chat/data-copilot.ts
+++ b/frontend/lib/chat/data-copilot.ts
@@ -2,19 +2,18 @@
 // SPDX-License-Identifier: MIT
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'
-import {
-  experimental_createMCPClient as createMCPClient,
-  type LanguageModelV1,
-} from 'ai'
+import { experimental_createMCPClient as createMCPClient, type LanguageModelV1 } from 'ai'
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
 import type { Pool } from 'pg'
-import type { ChatResponse } from '../../server/repo/chat.repo'
+import type { ChatResponse, IChatResponseDb } from '../../server/repo/chat.repo'
 import { ChatRepository } from '../../server/repo/chat.repo'
 
-import { TextToSqlAgent, PipeAgent, RouterAgent } from './agents'
+import { TextToSqlAgent, PipeAgent, RouterAgent, AuditorAgent } from './agents'
 import { executePipeInstructions, executeTextToSqlInstructions } from './instructions'
 import type {
   AgentResponseCompleteParams,
+  AuditorAgentInput,
+  ChatMessage,
   DataCopilotQueryInput,
   PipeAgentInput,
   PipeAgentStreamInput,
@@ -25,6 +24,7 @@ import type {
   TextToSqlAgentStreamInput,
 } from './types'
 import { RouterDecisionAction, StreamDataStatus, StreamDataType } from './enums'
+import { generateDataSummary } from './utils/data-summary'
 
 const bedrock = createAmazonBedrock({
   accessKeyId: process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID,
@@ -65,6 +65,12 @@ export class DataCopilot {
   /** Bedrock model identifier */
   private readonly BEDROCK_MODEL_ID = 'us.anthropic.claude-sonnet-4-20250514-v1:0'
 
+  /** Maximum number of auditor retry attempts */
+  private readonly MAX_AUDITOR_RETRIES = 1
+
+  /** Maximum number of SQL execution retry attempts */
+  private readonly MAX_SQL_RETRIES = 2
+
   constructor() {
     this.model = bedrock(this.BEDROCK_MODEL_ID)
     this.tbMcpUrl = `https://mcp.tinybird.co?token=${process.env.NUXT_INSIGHTS_DATA_COPILOT_TINYBIRD_TOKEN}&host=${process.env.NUXT_TINYBIRD_BASE_URL}`
@@ -92,7 +98,6 @@ export class DataCopilot {
   private buildToolsOverview(): void {
     const excludedFromOverview = new Set([
       'explore_data',
-      'execute_query',
       'text_to_sql',
       'list_endpoints',
       'list_service_datasources',
@@ -127,6 +132,57 @@ export class DataCopilot {
     return await chatRepo.saveChatResponse(response, userEmail)
   }
 
+  /**
+   * Create initial chat response early to get ID for tracking agent steps
+   */
+  private async createInitialChatResponse(
+    userPrompt: string,
+    insightsDbPool: Pool,
+    userEmail: string,
+    conversationId?: string,
+  ): Promise<string> {
+    const chatRepo = new ChatRepository(insightsDbPool)
+    return await chatRepo.createInitialChatResponse(userPrompt, userEmail, conversationId)
+  }
+
+  /**
+   * Update chat response with final data
+   */
+  private async updateChatResponse(
+    chatResponseId: string,
+    response: Omit<ChatResponse, 'userPrompt'>,
+    insightsDbPool: Pool,
+  ): Promise<void> {
+    const chatRepo = new ChatRepository(insightsDbPool)
+    return await chatRepo.updateChatResponse(chatResponseId, response)
+  }
+
+  /**
+   * Track an agent execution step
+   */
+  private async trackAgentStep(
+    chatResponseId: string,
+    agent: 'ROUTER' | 'PIPE' | 'TEXT_TO_SQL' | 'AUDITOR' | 'CHART' | 'EXECUTE_INSTRUCTIONS',
+    response: any | undefined,
+    responseTimeSeconds: number,
+    insightsDbPool: Pool,
+    errorMessage?: string,
+    instructions?: string,
+  ): Promise<void> {
+    const chatRepo = new ChatRepository(insightsDbPool)
+    await chatRepo.saveAgentStep({
+      chatResponseId,
+      agent,
+      model: agent === 'EXECUTE_INSTRUCTIONS' ? undefined : this.BEDROCK_MODEL_ID,
+      response,
+      inputTokens: response?.usage?.promptTokens || 0,
+      outputTokens: response?.usage?.completionTokens || 0,
+      responseTimeSeconds,
+      instructions,
+      errorMessage,
+    })
+  }
+
   /**
    * Executes the router agent to analyze user queries and determine the optimal processing strategy.
    * The router acts as the decision-making component that routes requests to either SQL generation
@@ -147,6 +203,7 @@ export class DataCopilot {
     pipe,
     parametersString,
     segmentId,
+    previousWasClarification,
   }: Omit<RouterAgentInput, 'toolsOverview' | 'model' | 'tools'>) {
     const agent = new RouterAgent()
     return agent.execute({
@@ -159,6 +216,7 @@ export class DataCopilot {
       pipe,
       parametersString,
       segmentId,
+      previousWasClarification,
     })
   }
 
@@ -188,7 +246,6 @@ export class DataCopilot {
     const followUpTools = this.tbTools
     delete followUpTools['execute_query']
 
-
     const agent = new TextToSqlAgent()
     return agent.execute({
       model: this.model,
@@ -249,6 +306,320 @@ export class DataCopilot {
     })
   }
 
+  /**
+   * Executes the auditor agent to validate whether retrieved data actually answers the user's question.
+   * Uses statistical analysis of data structure and content without requiring full dataset transmission.
+   *
+   * @param messages - Conversation history for context
+   * @param originalQuestion - The user's original question
+   * @param reformulatedQuestion - Router's enhanced interpretation of the question
+   * @param data - Retrieved data to validate
+   * @param attemptNumber - Current retry attempt (0 for first attempt)
+   * @param previousFeedback - Feedback from previous auditor run if this is a retry
+   * @returns Validation result with summary or feedback for router
+   */
+  private async runAuditorAgent({
+    messages,
+    originalQuestion,
+    reformulatedQuestion,
+    data,
+    attemptNumber,
+    previousFeedback,
+  }: Omit<AuditorAgentInput, 'model' | 'dataSummary'> & { data: any[] }) {
+    const dataSummary = generateDataSummary(data)
+    const agent = new AuditorAgent()
+    return agent.execute({
+      model: this.model,
+      messages,
+      originalQuestion,
+      reformulatedQuestion,
+      dataSummary,
+      attemptNumber,
+      previousFeedback,
+    })
+  }
+
+  /**
+   * Run execution and validation loop with auditor feedback
+   * Handles router execution, query/pipes execution, validation, and retries
+   *
+   * @returns Router action and results after validation
+   */
+  private async runExecutionWithAuditorLoop({
+    messages,
+    currentQuestion,
+    date,
+    projectName,
+    pipe,
+    parametersString,
+    segmentId,
+    previousWasClarification,
+    dataStream,
+    responseData,
+    chatResponseId,
+    insightsDbPool,
+  }: {
+    messages: ChatMessage[]
+    currentQuestion: string
+    date: string
+    projectName: string
+    pipe: string
+    parametersString: string
+    segmentId: string
+    previousWasClarification: boolean
+    dataStream: any
+    responseData: ChatResponse
+    chatResponseId: string
+    insightsDbPool: Pool
+  }): Promise<{
+    action: RouterDecisionAction
+    routerOutput: RouterOutput
+    sqlQuery?: string
+    pipeInstructions?: PipeInstructions
+  }> {
+    let attemptNumber = 0
+    let previousFeedback: string | undefined = undefined
+    let currentMessages = messages
+    let routerOutput: RouterOutput
+    let sqlQuery: string | undefined = undefined
+    let pipeInstructions: PipeInstructions | undefined = undefined
+
+    while (attemptNumber <= this.MAX_AUDITOR_RETRIES) {
+      // Run router agent - only stream status on first attempt
+      if (attemptNumber === 0) {
+        dataStream.writeData({
+          type: StreamDataType.ROUTER_STATUS,
+          status: StreamDataStatus.ANALYZING,
+        })
+      }
+
+      const routerStartTime = Date.now()
+      try {
+        routerOutput = await this.runRouterAgent({
+          messages: currentMessages,
+          date,
+          projectName,
+          pipe,
+          parametersString,
+          segmentId,
+          previousWasClarification: attemptNumber === 0 ? previousWasClarification : false,
+        })
+        const routerResponseTime = (Date.now() - routerStartTime) / 1000
+
+        // Track router agent step
+        await this.trackAgentStep(
+          chatResponseId,
+          'ROUTER',
+          routerOutput,
+          routerResponseTime,
+          insightsDbPool,
+        )
+      } catch (error) {
+        const routerResponseTime = (Date.now() - routerStartTime) / 1000
+        await this.trackAgentStep(
+          chatResponseId,
+          'ROUTER',
+          undefined,
+          routerResponseTime,
+          insightsDbPool,
+          error instanceof Error ? error.message : String(error),
+        )
+        throw error
+      }
+
+      // Accumulate router token usage
+      if (routerOutput.usage) {
+        responseData.inputTokens += routerOutput.usage.promptTokens || 0
+        responseData.outputTokens += routerOutput.usage.completionTokens || 0
+      }
+
+      // Handle STOP and ASK_CLARIFICATION - no auditor needed
+      if (
+        routerOutput.next_action === RouterDecisionAction.STOP ||
+        routerOutput.next_action === RouterDecisionAction.ASK_CLARIFICATION
+      ) {
+        return { action: routerOutput.next_action, routerOutput }
+      }
+
+      // Router decided on CREATE_QUERY or PIPES - only stream complete status on first attempt
+      if (attemptNumber === 0) {
+        dataStream.writeData({
+          type: StreamDataType.ROUTER_STATUS,
+          status: StreamDataStatus.COMPLETE,
+          reasoning: routerOutput.reasoning,
+          reformulatedQuestion: routerOutput.reformulated_question,
+        })
+      }
+
+      let data: any[] = []
+
+      // Execute based on router decision
+      if (routerOutput.next_action === RouterDecisionAction.CREATE_QUERY) {
+        const result = await this.handleCreateQueryAction({
+          messages: currentMessages,
+          date,
+          projectName,
+          pipe,
+          parametersString,
+          segmentId,
+          reformulatedQuestion: routerOutput.reformulated_question,
+          dataStream,
+          chatResponseId,
+          insightsDbPool,
+        })
+        sqlQuery = result.sqlQuery
+        data = result.data
+      } else if (routerOutput.next_action === RouterDecisionAction.PIPES) {
+        const result = await this.handlePipesAction({
+          messages: currentMessages,
+          date,
+          projectName,
+          pipe,
+          parametersString,
+          segmentId,
+          reformulatedQuestion: routerOutput.reformulated_question,
+          toolNames: routerOutput.tools,
+          dataStream,
+          responseData,
+          routerOutput,
+          chatResponseId,
+          insightsDbPool,
+        })
+        pipeInstructions = result.pipeInstructions
+        data = result.data
+      }
+
+      // Stream auditor status
+      dataStream.writeData({
+        type: StreamDataType.AUDITOR_STATUS,
+        status: attemptNumber === 0 ? StreamDataStatus.VALIDATING : StreamDataStatus.RETRYING,
+        attempt: attemptNumber + 1,
+        maxAttempts: this.MAX_AUDITOR_RETRIES + 1,
+      })
+
+      // Run auditor validation
+      const auditorStartTime = Date.now()
+      let auditorResult
+      try {
+        auditorResult = await this.runAuditorAgent({
+          messages: currentMessages,
+          originalQuestion: currentQuestion,
+          reformulatedQuestion: routerOutput.reformulated_question,
+          data,
+          attemptNumber,
+          previousFeedback,
+        })
+        const auditorResponseTime = (Date.now() - auditorStartTime) / 1000
+
+        // Track auditor agent step
+        await this.trackAgentStep(
+          chatResponseId,
+          'AUDITOR',
+          auditorResult,
+          auditorResponseTime,
+          insightsDbPool,
+          undefined,  // Feedback is not an error, it's part of the response
+        )
+      } catch (error) {
+        const auditorResponseTime = (Date.now() - auditorStartTime) / 1000
+        await this.trackAgentStep(
+          chatResponseId,
+          'AUDITOR',
+          undefined,
+          auditorResponseTime,
+          insightsDbPool,
+          error instanceof Error ? error.message : String(error),
+        )
+        throw error
+      }
+
+      // Accumulate auditor token usage
+      if (auditorResult.usage) {
+        responseData.inputTokens += auditorResult.usage.promptTokens || 0
+        responseData.outputTokens += auditorResult.usage.completionTokens || 0
+      }
+
+      if (auditorResult.is_valid) {
+        // Data is valid, stream summary and data
+        dataStream.writeData({
+          type: StreamDataType.AUDITOR_STATUS,
+          status: StreamDataStatus.VALIDATED,
+          summary: auditorResult.summary,
+          reasoning: auditorResult.reasoning,
+        })
+
+        // Stream data after auditor approval
+        if (routerOutput.next_action === RouterDecisionAction.CREATE_QUERY) {
+          dataStream.writeData({
+            type: StreamDataType.SQL_RESULT,
+            instructions: sqlQuery,
+            data,
+          })
+        } else if (routerOutput.next_action === RouterDecisionAction.PIPES) {
+          dataStream.writeData({
+            type: StreamDataType.PIPE_RESULT,
+            instructions: pipeInstructions,
+            data,
+          })
+        }
+
+        return { action: routerOutput.next_action, routerOutput, sqlQuery, pipeInstructions }
+      }
+
+      // Data is invalid
+      if (attemptNumber >= this.MAX_AUDITOR_RETRIES) {
+        // Max retries reached, send final status and stream data anyway
+        dataStream.writeData({
+          type: StreamDataType.AUDITOR_STATUS,
+          status: StreamDataStatus.MAX_RETRIES,
+          feedback: auditorResult.feedback_to_router,
+          reasoning: auditorResult.reasoning,
+        })
+
+        // Stream data even though validation failed (max retries reached)
+        if (routerOutput.next_action === RouterDecisionAction.CREATE_QUERY) {
+          dataStream.writeData({
+            type: StreamDataType.SQL_RESULT,
+            instructions: sqlQuery,
+            data,
+          })
+        } else if (routerOutput.next_action === RouterDecisionAction.PIPES) {
+          dataStream.writeData({
+            type: StreamDataType.PIPE_RESULT,
+            instructions: pipeInstructions,
+            data,
+          })
+        }
+
+        return { action: routerOutput.next_action, routerOutput, sqlQuery, pipeInstructions }
+      }
+
+      // Prepare for retry - add feedback to messages and loop
+      previousFeedback = auditorResult.feedback_to_router
+      attemptNumber++
+
+      dataStream.writeData({
+        type: StreamDataType.AUDITOR_STATUS,
+        status: StreamDataStatus.RETRYING,
+        feedback: previousFeedback,
+        attempt: attemptNumber + 1,
+      })
+
+      // Add feedback to conversation context for next iteration
+      currentMessages = [
+        ...currentMessages,
+        {
+          role: 'system',
+          content: `Previous attempt did not produce valid results. Auditor feedback: ${previousFeedback}. \n
+                    Please adjust your approach based on this feedback.`,
+        },
+      ]
+    }
+
+    // This should never be reached, but TypeScript needs it
+    throw new Error('Auditor loop completed without returning a result')
+  }
+
   /**
    * Send keepalive message to prevent Cloudflare timeout
    */
@@ -272,11 +643,80 @@ export class DataCopilot {
     })
   }
 
+  /**
+   * Build messages array from conversation history
+   * Handles clarification merging if the previous response was ASK_CLARIFICATION
+   */
+  private async buildMessagesFromConversation(
+    currentQuestion: string,
+    conversationId: string | undefined,
+    insightsDbPool: Pool,
+  ): Promise<{ messages: ChatMessage[]; previousWasClarification: boolean }> {
+    const chatRepo = new ChatRepository(insightsDbPool)
+
+    if (!conversationId) {
+      // No conversation history, just return the current question
+      return {
+        messages: [{ role: 'user', content: currentQuestion }],
+        previousWasClarification: false,
+      }
+    }
+
+    const previousChatResponses = await chatRepo.getChatResponsesByConversation(conversationId)
+
+    if (previousChatResponses.length === 0) {
+      // No previous responses in this conversation
+      return {
+        messages: [{ role: 'user', content: currentQuestion }],
+        previousWasClarification: false,
+      }
+    }
+
+    // Check if the latest response was ASK_CLARIFICATION
+    const latestResponse = previousChatResponses[previousChatResponses.length - 1] as IChatResponseDb
+    const previousWasClarification =
+      latestResponse.router_response === RouterDecisionAction.ASK_CLARIFICATION
+
+    if (previousWasClarification) {
+      // Merge the clarification: combine the ambiguous question with the clarification answer
+      const ambiguousQuestion = latestResponse.user_prompt
+      const mergedQuestion = `Original question: ${ambiguousQuestion}\n\nClarification provided: ${currentQuestion}`
+
+      // Build messages: [older history before clarification] + [merged question]
+      const messages = previousChatResponses.slice(0, -1).map((response) => ({
+        role: 'user' as const,
+        content: response.user_prompt,
+      }))
+
+      // Add the merged question as the current message
+      messages.push({
+        role: 'user',
+        content: mergedQuestion,
+      })
+
+      return { messages, previousWasClarification: true }
+    }
+
+    // Normal case: build messages from all previous responses + current question
+    const messages = previousChatResponses.map((response) => ({
+      role: 'user' as const,
+      content: response.user_prompt,
+    }))
+
+    // Add the current question
+    messages.push({
+      role: 'user',
+      content: currentQuestion,
+    })
+
+    return { messages, previousWasClarification: false }
+  }
+
   /**
    * Main streaming handler that orchestrates the entire AI agent workflow
    */
   async streamingAgentRequestHandler({
-    messages,
+    currentQuestion,
     segmentId,
     projectName,
     pipe,
@@ -289,123 +729,113 @@ export class DataCopilot {
     const parametersString = JSON.stringify(parameters || {})
     const date = new Date().toISOString().slice(0, 10)
 
-    const responseData: ChatResponse = {
-          userPrompt: messages[messages.length - 1]?.content || '',
-          inputTokens: 0,
-          outputTokens: 0,
-          model: this.BEDROCK_MODEL_ID,
-          conversationId: conversationId || '',
-          routerResponse: RouterDecisionAction.STOP,
-          routerReason: '',
-          pipeInstructions: undefined as PipeInstructions | undefined,
-          sqlQuery: undefined as string | undefined,
-        }
+    // Build messages from conversation history
+    const { messages, previousWasClarification } = await this.buildMessagesFromConversation(
+      currentQuestion,
+      conversationId,
+      insightsDbPool,
+    )
 
-        try {
-          dataStream.writeData({
-            type: StreamDataType.ROUTER_STATUS,
-            status: StreamDataStatus.ANALYZING,
-          })
-          // Add padding for Cloudflare streaming threshold
-
-          const routerOutput = await this.runRouterAgent({
-            messages,
-            date,
-            projectName: projectName as string,
-            pipe,
-            parametersString,
-            segmentId: segmentId as string,
-          })
+    // Create initial chat response early to get ID for tracking agent steps
+    const chatResponseId = await this.createInitialChatResponse(
+      currentQuestion,
+      insightsDbPool,
+      userEmail,
+      conversationId,
+    )
 
-          // Accumulate token usage from router
-          if (routerOutput.usage) {
-            responseData.inputTokens += routerOutput.usage.promptTokens || 0
-            responseData.outputTokens += routerOutput.usage.completionTokens || 0
-          }
+    // Stream the chat response ID immediately
+    dataStream.writeData({
+      type: StreamDataType.CHAT_RESPONSE_ID,
+      id: chatResponseId,
+      conversationId: conversationId || '',
+    })
 
-          if (routerOutput.next_action === RouterDecisionAction.STOP) {
-            await this.handleStopAction(
-              messages[messages.length - 1]?.content || '',
-              routerOutput,
-              responseData,
-              dataStream,
-              insightsDbPool,
-              userEmail,
-              conversationId,
-            )
-            return
-          }
+    const responseData: ChatResponse = {
+      userPrompt: currentQuestion,
+      inputTokens: 0,
+      outputTokens: 0,
+      model: this.BEDROCK_MODEL_ID,
+      conversationId: conversationId || '',
+      routerResponse: RouterDecisionAction.STOP,
+      routerReason: '',
+      pipeInstructions: undefined as PipeInstructions | undefined,
+      sqlQuery: undefined as string | undefined,
+    }
 
-          dataStream.writeData({
-            type: StreamDataType.ROUTER_STATUS,
-            status: StreamDataStatus.COMPLETE,
-            reasoning: routerOutput.reasoning,
-            reformulatedQuestion: routerOutput.reformulated_question,
-          })
+    try {
+      // Run execution with auditor loop (handles router, execution, validation, retries)
+      const { action, routerOutput, sqlQuery, pipeInstructions } =
+        await this.runExecutionWithAuditorLoop({
+          messages,
+          currentQuestion,
+          date,
+          projectName: projectName as string,
+          pipe,
+          parametersString,
+          segmentId: segmentId as string,
+          previousWasClarification,
+          dataStream,
+          responseData,
+          chatResponseId,
+          insightsDbPool,
+        })
 
-          let sqlQuery: string | undefined = undefined
-          let pipeInstructions: PipeInstructions | undefined = undefined
+      // Handle STOP and ASK_CLARIFICATION actions
+      if (action === RouterDecisionAction.STOP) {
+        await this.handleStopAction(
+          chatResponseId,
+          routerOutput,
+          responseData,
+          dataStream,
+          insightsDbPool,
+          conversationId,
+        )
+        return
+      }
 
-          if (routerOutput.next_action === RouterDecisionAction.CREATE_QUERY) {
-            const result = await this.handleCreateQueryAction({
-              messages,
-              date,
-              projectName: projectName as string,
-              pipe,
-              parametersString,
-              segmentId: segmentId as string,
-              reformulatedQuestion: routerOutput.reformulated_question,
-              dataStream,
-            })
-            sqlQuery = result.sqlQuery
-          } else if (routerOutput.next_action === RouterDecisionAction.PIPES) {
-            const result = await this.handlePipesAction({
-              messages,
-              date,
-              projectName: projectName as string,
-              pipe,
-              parametersString,
-              segmentId: segmentId as string,
-              reformulatedQuestion: routerOutput.reformulated_question,
-              toolNames: routerOutput.tools,
-              dataStream,
-              responseData,
-              routerOutput,
-            })
-            pipeInstructions = result.pipeInstructions
-          }
+      if (action === RouterDecisionAction.ASK_CLARIFICATION) {
+        await this.handleAskClarificationAction(
+          chatResponseId,
+          routerOutput,
+          responseData,
+          dataStream,
+          insightsDbPool,
+          conversationId,
+        )
+        return
+      }
 
-          await this.handleResponseComplete({
-            userPrompt: messages[messages.length - 1]?.content || '',
-            responseData,
-            routerOutput,
-            pipeInstructions,
-            sqlQuery,
-            conversationId,
-            insightsDbPool,
-            userEmail,
-            dataStream,
-          })
-        } catch (error) {
-          dataStream.writeData({
-            type: 'router-status',
-            status: 'error',
-            error: error instanceof Error ? error.message : 'An error occurred',
-          })
-          throw error
-        }
+      // Handle completed execution (CREATE_QUERY or PIPES)
+      await this.handleResponseComplete({
+        chatResponseId,
+        responseData,
+        routerOutput,
+        pipeInstructions,
+        sqlQuery,
+        conversationId,
+        insightsDbPool,
+        dataStream,
+      })
+    } catch (error) {
+      dataStream.writeData({
+        type: 'router-status',
+        status: 'error',
+        error: error instanceof Error ? error.message : 'An error occurred',
+      })
+      throw error
+    }
   }
 
   /**
    * Handle router 'stop' action - send final response without further processing
    */
   private async handleStopAction(
-    userPrompt: string,
+    chatResponseId: string,
     routerOutput: RouterOutput,
     responseData: ChatResponse,
     dataStream: any,
     insightsDbPool: Pool,
-    userEmail: string,
     conversationId?: string,
   ): Promise<void> {
     dataStream.writeData({
@@ -414,9 +844,9 @@ export class DataCopilot {
       reasoning: routerOutput.reasoning,
     })
 
-    const chatResponseId = await this.saveChatResponse(
+    await this.updateChatResponse(
+      chatResponseId,
       {
-        userPrompt,
         inputTokens: responseData.inputTokens,
         outputTokens: responseData.outputTokens,
         routerResponse: RouterDecisionAction.STOP,
@@ -427,18 +857,46 @@ export class DataCopilot {
         conversationId: conversationId,
       },
       insightsDbPool,
-      userEmail,
     )
+  }
 
+  /**
+   * Handle router 'ask_clarification' action - ask user for clarification
+   */
+  private async handleAskClarificationAction(
+    chatResponseId: string,
+    routerOutput: RouterOutput,
+    responseData: ChatResponse,
+    dataStream: any,
+    insightsDbPool: Pool,
+    conversationId?: string,
+  ): Promise<void> {
     dataStream.writeData({
-      type: StreamDataType.CHAT_RESPONSE_ID,
-      id: chatResponseId,
-      conversationId: conversationId || '',
+      type: StreamDataType.ROUTER_STATUS,
+      status: StreamDataStatus.ASK_CLARIFICATION,
+      question: routerOutput.clarification_question,
+      reasoning: routerOutput.reasoning,
     })
+
+    await this.updateChatResponse(
+      chatResponseId,
+      {
+        inputTokens: responseData.inputTokens,
+        outputTokens: responseData.outputTokens,
+        routerResponse: RouterDecisionAction.ASK_CLARIFICATION,
+        routerReason: routerOutput.reasoning,
+        clarificationQuestion: routerOutput.clarification_question || undefined,
+        pipeInstructions: undefined,
+        sqlQuery: undefined,
+        model: this.BEDROCK_MODEL_ID,
+        conversationId: conversationId,
+      },
+      insightsDbPool,
+    )
   }
 
   /**
-   * Handle router 'create_query' action - generate and execute SQL query
+   * Handle router 'create_query' action - generate and execute SQL query with retry logic
    */
   private async handleCreateQueryAction({
     messages,
@@ -449,9 +907,15 @@ export class DataCopilot {
     segmentId,
     reformulatedQuestion,
     dataStream,
-  }: TextToSqlAgentStreamInput): Promise<{ sqlQuery: string }> {
-    // Send progress update before starting TextToSql agent
-    this.sendProgress(dataStream, 'progress', 'Analyzing database schema...')
+    chatResponseId,
+    insightsDbPool,
+  }: TextToSqlAgentStreamInput & { 
+    chatResponseId: string; 
+    insightsDbPool: Pool 
+  }): Promise<{ sqlQuery: string; data: any[] }> {
+    let attemptNumber = 0
+    let errorContext: import('./types').SqlErrorContext | undefined = undefined
+    let lastGeneratedQuery = ''
 
     // Set up keepalive interval during long operation
     const keepaliveInterval = setInterval(() => {
@@ -459,35 +923,191 @@ export class DataCopilot {
     }, 15000) // Send keepalive every 15 seconds
 
     try {
-      const textToSqlOutput = await this.runTextToSqlAgent({
-          messages,
-          date,
-          projectName,
-          pipe,
-          parametersString,
-          segmentId,
-          reformulatedQuestion,
-        })
+      while (attemptNumber <= this.MAX_SQL_RETRIES) {
+        // Send status update
+        if (attemptNumber === 0) {
+          this.sendProgress(dataStream, 'progress', 'Analyzing database schema...')
+          dataStream.writeData({
+            type: StreamDataType.SQL_STATUS,
+            status: StreamDataStatus.EXECUTING,
+            attempt: attemptNumber + 1,
+            maxAttempts: this.MAX_SQL_RETRIES + 1,
+          })
+        } else {
+          dataStream.writeData({
+            type: StreamDataType.SQL_STATUS,
+            status: StreamDataStatus.RETRYING,
+            attempt: attemptNumber + 1,
+            maxAttempts: this.MAX_SQL_RETRIES + 1,
+            error: errorContext?.errorMessage,
+          })
+        }
 
-      clearInterval(keepaliveInterval)
-      this.sendProgress(dataStream, 'progress', `SQL query generated! Executing [${textToSqlOutput.instructions}]...`)
+        try {
+          // Generate SQL query
+          const sqlStartTime = Date.now()
+          let textToSqlOutput
+          try {
+            textToSqlOutput = await this.runTextToSqlAgent({
+              messages,
+              date,
+              projectName,
+              pipe,
+              parametersString,
+              segmentId,
+              reformulatedQuestion,
+              errorContext,
+            })
+            const sqlResponseTime = (Date.now() - sqlStartTime) / 1000
 
-      const queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)
+            lastGeneratedQuery = textToSqlOutput.instructions
 
-      dataStream.writeData({
-        type: StreamDataType.SQL_RESULT,
-        explanation: textToSqlOutput.explanation,
-        instructions: textToSqlOutput.instructions,
-        data: queryData,
-      })
+            this.sendProgress(
+              dataStream,
+              'progress',
+              `SQL query generated! Executing...`,
+            )
+
+            // Track successful Text-to-SQL agent execution
+            await this.trackAgentStep(
+              chatResponseId,
+              'TEXT_TO_SQL',
+              textToSqlOutput,
+              sqlResponseTime,
+              insightsDbPool,
+            )
+          } catch (agentError: any) {
+            // Text-to-SQL agent itself failed
+            const sqlResponseTime = (Date.now() - sqlStartTime) / 1000
+            await this.trackAgentStep(
+              chatResponseId,
+              'TEXT_TO_SQL',
+              undefined,
+              sqlResponseTime,
+              insightsDbPool,
+              agentError instanceof Error ? agentError.message : String(agentError),
+            )
+            throw agentError
+          }
+
+          // Execute the query and track execution time
+          const sqlExecutionStart = Date.now()
+          let queryData
+          try {
+            queryData = await executeTextToSqlInstructions(textToSqlOutput.instructions)
+            const sqlExecutionTime = (Date.now() - sqlExecutionStart) / 1000
+
+            // Track successful SQL execution step
+            await this.trackAgentStep(
+              chatResponseId,
+              'EXECUTE_INSTRUCTIONS',
+              undefined,
+              sqlExecutionTime,
+              insightsDbPool,
+              undefined,
+              textToSqlOutput.instructions,
+            )
+          } catch (executionError: any) {
+            // SQL execution failed
+            const sqlExecutionTime = (Date.now() - sqlExecutionStart) / 1000
+            const errorMessage = this.extractSqlErrorMessage(executionError)
 
-      return { sqlQuery: textToSqlOutput.instructions }
+            // Track failed SQL execution
+            await this.trackAgentStep(
+              chatResponseId,
+              'EXECUTE_INSTRUCTIONS',
+              undefined,
+              sqlExecutionTime,
+              insightsDbPool,
+              errorMessage,
+              textToSqlOutput.instructions,
+            )
+
+            // Re-throw to handle retry logic
+            throw executionError
+          }
+
+          // Success - clear interval and return (don't stream data yet, auditor will do it)
+          clearInterval(keepaliveInterval)
+
+          return { sqlQuery: textToSqlOutput.instructions, data: queryData }
+        } catch (executionError: any) {
+          // SQL execution failed
+          console.error(`SQL execution error (attempt ${attemptNumber + 1}):`, executionError)
+
+          // Extract error message from Tinybird response
+          const errorMessage = this.extractSqlErrorMessage(executionError)
+
+          // Check if query contains UNION - TinyBird doesn't support it with CTEs
+          let enhancedErrorMessage = errorMessage
+          if (lastGeneratedQuery.toUpperCase().includes('UNION')) {
+            enhancedErrorMessage = `${errorMessage}\n\nCRITICAL: Your query contains UNION or UNION ALL, 
+            which is NOT supported by TinyBird's SQL API. This is likely causing the error. 
+            You MUST rewrite the query WITHOUT using UNION. 
+            Instead:\n
+            - Return a single result set with all data\n
+            - Use CASE statements to categorize different data types\n
+            - Add a 'type' or 'category' column to distinguish different aggregations\n
+            - Do NOT attempt to combine multiple SELECTs with UNION`
+          }
+
+          // Check if we've exhausted retries
+          if (attemptNumber >= this.MAX_SQL_RETRIES) {
+            clearInterval(keepaliveInterval)
+            dataStream.writeData({
+              type: StreamDataType.SQL_STATUS,
+              status: StreamDataStatus.MAX_RETRIES,
+              error: enhancedErrorMessage,
+              attempt: attemptNumber + 1,
+            })
+            throw new Error(
+              `SQL query failed after ${this.MAX_SQL_RETRIES + 1} attempts: ${enhancedErrorMessage}`,
+            )
+          }
+
+          // Prepare for retry
+          errorContext = {
+            errorMessage: enhancedErrorMessage,
+            previousQuery: lastGeneratedQuery,
+            attemptNumber: attemptNumber + 1,
+          }
+          attemptNumber++
+
+          dataStream.writeData({
+            type: StreamDataType.SQL_STATUS,
+            status: StreamDataStatus.EXECUTION_ERROR,
+            error: enhancedErrorMessage,
+            attempt: attemptNumber,
+          })
+
+          // Continue to next iteration
+        }
+      }
+
+      throw new Error('SQL retry loop completed without returning a result')
     } catch (error) {
       clearInterval(keepaliveInterval)
       throw error
     }
   }
 
+  /**
+   * Extract user-friendly error message from Tinybird API error
+   */
+  private extractSqlErrorMessage(error: any): string {
+    // Try to get detailed error from Tinybird response
+    if (error.data?.error) {
+      return error.data.error
+    }
+    if (error.response?.data?.error) {
+      return error.response.data.error
+    }
+    if (error.message) {
+      return error.message
+    }
+    return String(error)
+  }
+
   /**
    * Handle router 'pipes' action - generate and execute pipe instructions
    */
@@ -500,19 +1120,48 @@ export class DataCopilot {
     segmentId,
     reformulatedQuestion,
     toolNames,
-    dataStream,
     responseData,
-  }: PipeAgentStreamInput): Promise<{ pipeInstructions: PipeInstructions }> {
-    const pipeOutput = await this.runPipeAgent({
-      messages,
-      date,
-      projectName,
-      pipe,
-      parametersString,
-      segmentId: segmentId as string,
-      reformulatedQuestion,
-      toolNames,
-    })
+    chatResponseId,
+    insightsDbPool,
+  }: PipeAgentStreamInput & 
+  { chatResponseId: string; 
+    insightsDbPool: Pool 
+  }): Promise<{ pipeInstructions: PipeInstructions; data: any[] }> {
+    const pipeStartTime = Date.now()
+    let pipeOutput
+    try {
+      pipeOutput = await this.runPipeAgent({
+        messages,
+        date,
+        projectName,
+        pipe,
+        parametersString,
+        segmentId: segmentId as string,
+        reformulatedQuestion,
+        toolNames,
+      })
+      const pipeResponseTime = (Date.now() - pipeStartTime) / 1000
+
+      // Track Pipe agent execution
+      await this.trackAgentStep(
+        chatResponseId,
+        'PIPE',
+        pipeOutput,
+        pipeResponseTime,
+        insightsDbPool,
+      )
+    } catch (error) {
+      const pipeResponseTime = (Date.now() - pipeStartTime) / 1000
+      await this.trackAgentStep(
+        chatResponseId,
+        'PIPE',
+        undefined,
+        pipeResponseTime,
+        insightsDbPool,
+        error instanceof Error ? error.message : String(error),
+      )
+      throw error
+    }
 
     // Accumulate token usage from pipe agent
     if (pipeOutput.usage) {
@@ -520,36 +1169,57 @@ export class DataCopilot {
       responseData.outputTokens += pipeOutput.usage.completionTokens || 0
     }
 
-    // Execute the pipes according to the instructions and combine results
-    const combinedData = await executePipeInstructions(pipeOutput.instructions)
+    // Execute the pipes according to the instructions and combine results (don't stream data yet, auditor will do it)
+    const pipeExecutionStart = Date.now()
+    try {
+      const combinedData = await executePipeInstructions(pipeOutput.instructions)
+      const pipeExecutionTime = (Date.now() - pipeExecutionStart) / 1000
 
-    dataStream.writeData({
-      type: StreamDataType.PIPE_RESULT,
-      explanation: pipeOutput.explanation,
-      instructions: pipeOutput.instructions,
-      data: combinedData,
-    })
+      // Track successful pipe execution step
+      await this.trackAgentStep(
+        chatResponseId,
+        'EXECUTE_INSTRUCTIONS',
+        undefined,
+        pipeExecutionTime,
+        insightsDbPool,
+        undefined,
+        JSON.stringify(pipeOutput.instructions),
+      )
+
+      return { pipeInstructions: pipeOutput.instructions, data: combinedData }
+    } catch (error) {
+      const pipeExecutionTime = (Date.now() - pipeExecutionStart) / 1000
 
-    return { pipeInstructions: pipeOutput.instructions }
+      // Track failed pipe execution
+      await this.trackAgentStep(
+        chatResponseId,
+        'EXECUTE_INSTRUCTIONS',
+        undefined,
+        pipeExecutionTime,
+        insightsDbPool,
+        error instanceof Error ? error.message : String(error),
+        JSON.stringify(pipeOutput.instructions),
+      )
+
+      throw error
+    }
   }
 
   /**
-   * Save final response to database and stream chat response ID
+   * Save final response to database
    */
   private async handleResponseComplete({
-    userPrompt,
+    chatResponseId,
     responseData,
     routerOutput,
     pipeInstructions,
     sqlQuery,
     conversationId,
     insightsDbPool,
-    userEmail,
-    dataStream,
-  }: AgentResponseCompleteParams): Promise<void> {
-    const chatResponseId = await this.saveChatResponse(
+  }: Omit<AgentResponseCompleteParams, 'userPrompt' | 'userEmail'> & { chatResponseId: string }): Promise<void> {
+    await this.updateChatResponse(
+      chatResponseId,
       {
-        userPrompt,
         inputTokens: responseData.inputTokens,
         outputTokens: responseData.outputTokens,
         routerResponse: routerOutput.next_action,
@@ -560,13 +1230,6 @@ export class DataCopilot {
         conversationId: conversationId,
       },
       insightsDbPool,
-      userEmail,
     )
-
-    dataStream.writeData({
-      type: StreamDataType.CHAT_RESPONSE_ID,
-      id: chatResponseId,
-      conversationId: conversationId || '',
-    })
   }
 }
diff --git a/frontend/lib/chat/enums.ts b/frontend/lib/chat/enums.ts
index 32192e973..313732f05 100644
--- a/frontend/lib/chat/enums.ts
+++ b/frontend/lib/chat/enums.ts
@@ -4,16 +4,26 @@ export enum StreamDataType {
   ROUTER_STATUS = 'router-status',
   CHAT_RESPONSE_ID = 'chat-response-id',
   SQL_RESULT = 'sql-result',
+  SQL_STATUS = 'sql-status',
   PIPE_RESULT = 'pipe-result',
+  AUDITOR_STATUS = 'auditor-status',
 }
 
 export enum StreamDataStatus {
   ANALYZING = 'analyzing',
   COMPLETE = 'complete',
+  ASK_CLARIFICATION = 'ask_clarification',
+  VALIDATING = 'validating',
+  VALIDATED = 'validated',
+  RETRYING = 'retrying',
+  MAX_RETRIES = 'max_retries',
+  EXECUTING = 'executing',
+  EXECUTION_ERROR = 'execution_error',
 }
 
 export enum RouterDecisionAction {
   STOP = 'stop',
   CREATE_QUERY = 'create_query',
   PIPES = 'pipes',
+  ASK_CLARIFICATION = 'ask_clarification',
 }
diff --git a/frontend/lib/chat/prompts/auditor.ts b/frontend/lib/chat/prompts/auditor.ts
new file mode 100644
index 000000000..b6fa3423d
--- /dev/null
+++ b/frontend/lib/chat/prompts/auditor.ts
@@ -0,0 +1,159 @@
+// Copyright (c) 2025 The Linux Foundation and each contributor.
+// SPDX-License-Identifier: MIT
+import type { DataSummary } from '../utils/data-summary'
+
+export const auditorPrompt = (
+  originalQuestion: string,
+  reformulatedQuestion: string,
+  dataSummary: DataSummary,
+  attemptNumber: number,
+  previousFeedback?: string,
+) => {
+  const statsFormatted = Object.entries(dataSummary.columnStats)
+    .map(([col, stats]) => {
+      const lines = [`- ${col} (${stats.type}):`]
+
+      if (stats.nullPercentage > 0) {
+        lines.push(`  • ${stats.nullPercentage}% null values`)
+      }
+
+      if (stats.type === 'numeric') {
+        lines.push(`  • Range: ${stats.min} to ${stats.max}`)
+        lines.push(`  • Average: ${stats.avg}`)
+        if (stats.hasAllZeros) lines.push(`  • ⚠️ All values are zero`)
+      }
+
+      if (stats.type === 'date') {
+        lines.push(`  • Date range: ${stats.dateRange}`)
+        lines.push(`  • ${stats.distinctCount} distinct dates`)
+      }
+
+      if (stats.type === 'string') {
+        lines.push(`  • ${stats.distinctCount} distinct values`)
+      }
+
+      return lines.join('\n')
+    })
+    .join('\n')
+
+  return `You are an Auditor agent that validates whether retrieved data can answer the user's question.
+
+## USER'S QUESTION
+${originalQuestion}
+
+## ENHANCED QUERY (Router's Interpretation)
+${reformulatedQuestion}
+
+## DATA SUMMARY
+**Total Rows:** ${dataSummary.rowCount}
+**Columns:** ${dataSummary.columns.join(', ')}
+
+**Column Statistics:**
+${statsFormatted}
+
+${
+  attemptNumber > 0
+    ? `
+## RETRY ATTEMPT #${attemptNumber + 1}
+Previous feedback: ${previousFeedback}
+⚠️ The router already tried once. Check if the issue was addressed.
+`
+    : ''
+}
+
+---
+
+## YOUR TASK
+
+Make a **BINARY decision**: Can this data answer the user's question?
+
+### Validation Checklist
+
+**1. Column Coverage**
+- Are all required columns present?
+- Do column names semantically match the question?
+
+**2. Data Quality**
+- Row count > 0?
+- Key columns not 100% null?
+- Numeric metrics not all zeros?
+
+**3. Time Dimension (if applicable)**
+- If question asks for time-series data (e.g., "daily activity", "monthly trends"), verify:
+  - Date column exists in output
+  - Date range matches question timeframe
+  - Enough distinct dates for the requested granularity
+- If question only filters by time (e.g., "top 5 orgs last quarter"), date column in output is NOT required
+  - Time filtering happens in query, final result can be a simple list
+
+**4. Granularity**
+- If question asks "by company", is there a company/organization column?
+- If question asks for breakdown, are grouping columns present?
+
+**5. Metric Presence (context-dependent)**
+- **Requires numeric metric** if question asks for:
+  - Aggregations: "count", "total", "average", "sum"
+  - Trends: "growth", "change", "increase"
+  - Rankings: "top", "most", "highest", "bottom", "least", "lowest"
+- **Does NOT require metric** for pure listing questions:
+  - "which", "list all", "show", "enumerate", "what are the"
+  - Example: "Which days had no activity" only needs date/repository columns, not an activity count column
+
+### Decision Criteria
+
+✅ **is_valid = true** IF:
+- All required columns exist (even if imperfect names)
+- Data has > 0 rows with non-null values
+- For time-series questions: date column present and range matches
+- For time-filtered questions: date column NOT required in output
+- Granularity is appropriate (right grouping columns)
+- For aggregation/ranking questions: relevant metric present
+- For listing questions: metric NOT required
+
+❌ **is_valid = false** IF:
+- Missing critical columns (e.g., no metric for "show activity")
+- 0 rows or all nulls/zeros in key columns
+- Wrong time period (e.g., 2023 data for "2024" question)
+- Wrong aggregation (e.g., monthly when daily requested)
+- Columns completely irrelevant to question
+
+### Output Requirements
+
+**IF is_valid = true:**
+- Set \`is_valid: true\`
+- Write a brief \`summary\` (2-3 sentences) for the user:
+  - What the data shows
+  - Key findings based on statistics
+  - Direct answer to their question
+  - Example: "Commit activity in 2024 ranged from 0 to 453 per day across 12 companies, 
+  with an average of 87 commits daily."
+
+**IF is_valid = false:**
+- Set \`is_valid: false\`
+- Write \`feedback_to_router\` with SPECIFIC fixes:
+  - What column is missing? (e.g., "Need commit_count or activity metric")
+  - What's wrong with data? (e.g., "Date range is 2023, but question asks for 2024")
+  - What should router try instead? (e.g., "Use active_contributors_by_date pipe instead")
+  - Be direct and actionable
+
+### Important Notes
+- **Question type determines requirements:**
+  - Time-series questions ("daily commits", "monthly trend") → need date column in output
+  - Time-filtered questions ("top 5 last month") → date column NOT needed in output
+  - Listing questions ("which repos", "list all") → metric NOT needed
+  - Aggregation/ranking questions → metric IS needed
+- **Statistics are your friend:** Use min/max/avg/range to validate without seeing raw data
+- **Date ranges:** Only validate if question asks for time-series data, not just time-filtered results
+- **Distinct counts matter:** Low distinctCount on grouping columns = problem
+- **Don't be overly strict:** If data can partially answer, mark valid
+- ${attemptNumber >= 1 ? '**This is a RETRY:** Be slightly more lenient unless clearly broken' : ''}
+
+---
+
+## REASONING FORMAT
+Explain your decision in 2-3 sentences:
+1. What you validated in the statistics
+2. Whether it matches the question requirements
+3. Your final decision
+`
+}
diff --git a/frontend/lib/chat/prompts/pipe.ts b/frontend/lib/chat/prompts/pipe.ts
index 906ac99c8..e1f4bc38c 100644
--- a/frontend/lib/chat/prompts/pipe.ts
+++ b/frontend/lib/chat/prompts/pipe.ts
@@ -128,11 +128,22 @@ Always ensure variables in formulas match the dependency variable names.
    - Use segmentId when relevant to the query
    - Always include parameters mentioned in the user question
    - Set onlyContributions to 0 when querying non-contribution activities (stars, forks, social mentions, etc.)
-   - Apply timestamp filters for time-based queries  
+   - Apply timestamp filters for time-based queries
    - Use provided parameters as defaults
    - NEVER use custom SQL queries in pipe inputs (no "q" parameter)
    - **If unsure about a parameter, include it if it's documented in the tool schema**
 
+   **CRITICAL: Date/Time Parameter Format:**
+   - **ALL date and timestamp parameters MUST use format: "YYYY-MM-DD HH:MM:SS"**
+   - ❌ WRONG: "2024-10-08"
+   - ✅ CORRECT: "2024-10-08 00:00:00"
+   - This applies to: startDate, endDate, timestamp, dateFrom, dateTo, and any other date/time parameters
+   - For start dates/times: use "00:00:00" (beginning of day)
+   - For end dates/times: use "23:59:59" (end of day) or "00:00:00" of the next day
+   - Examples:
+     * "startDate": "2024-01-01 00:00:00"
+     * "endDate": "2024-12-31 23:59:59"
+
 3. **Focus on the Task:**
    - Answer the reformulated question directly
    - Use the tools specified by the router
diff --git a/frontend/lib/chat/prompts/router.ts b/frontend/lib/chat/prompts/router.ts
index 0c339535b..567793722 100644
--- a/frontend/lib/chat/prompts/router.ts
+++ b/frontend/lib/chat/prompts/router.ts
@@ -9,6 +9,7 @@ export const routerPrompt = (
   parametersString: string,
   segmentId: string | null,
   toolsOverview: string,
+  previousWasClarification?: boolean,
 ) => {
   const dashboardDescription = pipe
     ? `Project "${projectName}" using ${pipe} tool with parameters: ${parametersString}`
@@ -16,7 +17,21 @@ export const routerPrompt = (
 
   const pipeToolQuestion = pipe ? `- Can ${pipe} tool answer this with different parameters?` : ''
 
-  return `You are a routing agent that analyzes user questions and determines the appropriate next action. Your job is to evaluate questions and decide whether they can be answered with existing tools, need custom queries, or cannot be answered.
+  const clarificationWarning = previousWasClarification
+    ? `\n\n## ⚠️ IMPORTANT: CLARIFICATION LOOP PREVENTION
+**The previous response was ASK_CLARIFICATION. You have already asked the user for clarification once.**
+
+CRITICAL RULES:
+- **STRONGLY PREFER** answering the question with available data or tools over asking for another clarification
+- **ONLY** ask for clarification again if the question is still completely impossible to interpret
+- Make reasonable assumptions based on the context and user's clarification
+- If you can answer the question with ANY interpretation, choose "pipes" or "create_query" instead of "ask_clarification"
+- **NEVER** create a clarification loop by repeatedly asking for clarification
+
+The user has already provided clarification. Work with what you have.`
+    : ''
+
+  return `You are a routing agent that analyzes user questions and determines the appropriate next action. Your job is to evaluate questions and decide whether they can be answered with existing tools, need custom queries, or cannot be answered.${clarificationWarning}
 
 # DATE AND CONTEXT
 Today's date: ${date}
@@ -27,13 +42,57 @@ Segment ID: ${segmentId || 'not specified'}
 You are a ROUTER that decides the next action based on the user's question. You DO NOT execute queries or retrieve data - you only analyze and route.
 
 # AVAILABLE TOOLS
-You can ONLY CALL the tool: list_datasources. Use it to examine available tables and fields when needed.
+You can CALL these tools: list_datasources and execute_query.
+- Use list_datasources to examine available tables and fields
+- Use execute_query to query activityTypes table when needed (see Activity Types Reference below)
 
 However, you can SEE the full catalog of tools and their definitions below. Use this knowledge to select which tools should be used by the next agent, but DO NOT attempt to call them yourself.
 
 ## Tools Catalog (read-only)
 ${toolsOverview}
 
+# ACTIVITY TYPES REFERENCE
+
+**CRITICAL EXCEPTION - Commits:**
+
+When users ask about "commits" or "commit activity", **DO NOT query the activityTypes table**.
+
+**ALWAYS use ONLY these two types:**
+- \`"authored-commit"\`
+- \`"committed-commit"\`
+
+**NEVER include:**
+- \`"co-authored-commit"\` or any other commit-related variants
+- DO NOT query activityTypes to discover commit types
+- These two types are sufficient for all commit queries
+
+---
+
+**For all OTHER activity types** (pull requests, issues, stars, forks, etc.), you should query the activityTypes table to understand which types exist.
+
+The \`activityTypes\` table schema:
+- \`activityType\` (varchar): Type identifier (e.g., "pull_request-opened", "issues-closed")
+- \`description\` (varchar): Human-readable description
+- \`platform\` (varchar): Platform (e.g., "github", "gitlab", "jira")
+- \`isCodeContribution\` (boolean): True if this is a code contribution
+- \`isCollaboration\` (boolean): True if this is a collaboration activity
+
+**When to query activityTypes:**
+1. User asks for **pull requests, issues, stars, forks** or other non-commit activities
+2. User asks for "contributions": Query WHERE isCodeContribution = true
+3. User asks for "collaborations": Query WHERE isCollaboration = true
+4. User asks for platform-specific activities: Add platform filter
+
+**How to use this information:**
+- Query activityTypes to discover relevant types
+- Include discovered types in your reformulated_question
+- Example: "Show me opened pull requests" → Query activityTypes for PR types → reformulated: "Show pull_request-opened activity count"
+
+**Example query:**
+\`\`\`sql
+SELECT activityType, description FROM activityTypes WHERE activityType LIKE '%pull_request%'
+\`\`\`
+
 # ROUTING LOGIC - VALIDATE ANSWERABILITY
 
 **PRIORITY ORDER: Always prefer pipes over custom queries when possible**
@@ -72,15 +131,72 @@ ${pipeToolQuestion}
 **Step 2: Check Data Sources (only if Step 1 is NO)**
 - Use list_datasources to examine available tables and fields
 - Check if the required fields exist in any data source
-- Pay special attention to the activityRelations_deduplicated_cleaned_ds table
+- Pay special attention to the pull_requests_analyzed, issues_analyzed, activityRelations_data_copilot tables
+- If something can be answered by avoiding using activityRelations_data_copilot, prefer that (e.g., use pull_requests_analyzed for PR counts, issues_analyzed for issue counts, etc.)
 - If the needed fields exist → Question is VALID, route to "create_query" action
 - If fields don't exist → Question is INVALID, route to "stop" action
 - If the question is referencing a field about contributors/people that we have only for organizations, the question is INVALID
 
+# BEFORE CHOOSING "stop"
+
+The "stop" action means the data fundamentally doesn't exist in our system.
+
+**MANDATORY CHECKLIST - You MUST verify ALL of these before returning "stop":**
+- ✓ Checked all available pipes (not just obvious keyword matches)
+- ✓ Considered calling same pipe multiple times with different parameters (for comparisons, growth, trends)
+- ✓ Considered combining multiple different pipes
+- ✓ Used list_datasources to verify the data truly doesn't exist
+- ✓ Verified create_query cannot work with any available tables
+
+**Valid reasons for "stop":**
+- Data about external systems we don't track (e.g., "Twitter sentiment", "stock prices", "news coverage")
+- Metrics we don't collect (e.g., "code quality scores", "security vulnerabilities", "test coverage")
+- After using list_datasources, confirmed no relevant tables/fields exist
+- Question asks for contributor-level metrics we only have for organizations
+
+**Invalid reasons for "stop" (use pipes or create_query instead):**
+- Question needs comparison/growth calculation → Use pipes multiple times with different parameters
+- Question seems complex or requires multiple steps → Break it down into pipe combinations
+- Question asks for aggregation across dimensions → Use create_query
+- Question asks for time-based trends → Use pipes with different time periods
+
+**Anti-pattern Examples - Learn from these WRONG decisions:**
+
+❌ **WRONG Stop Decision:**
+- Question: "Identify countries where activity is growing fastest"
+- Bad Decision: stop
+- Bad Reasoning: "Missing time-series geographic activity data for growth calculations"
+- **Why Wrong:** Can use geo distribution pipes (contributors_geo_distribution, organizations_geo_distribution) called twice with different time periods (current vs previous), then calculate growth from the two datasets
+
+✅ **CORRECT Decision:**
+- Question: "Identify countries where activity is growing fastest"
+- Good Decision: pipes
+- Tools: ["contributors_geo_distribution", "organizations_geo_distribution"]
+- Reasoning: "Call geo distribution tools with current period and previous period, compare results to calculate growth"
+
+❌ **WRONG Stop Decision:**
+- Question: "Compare forks vs stars this month"
+- Bad Decision: stop
+- Bad Reasoning: "No comparison tool available"
+- **Why Wrong:** Can call activity pipes separately for each metric type
+
+✅ **CORRECT Decision:**
+- Question: "Compare forks vs stars this month"
+- Good Decision: pipes
+- Tools: ["activities_count"]
+- Reasoning: "Call activities_count twice - once for forks, once for stars"
+
 # ROUTING DECISIONS
+** CRITICAL: Always consider the current dashboard information to disambiguate.
+  * Example: Split the count by repository to find the repo with the strongest growth.
+  * In the example above, if the user is coming from active-contributors dashboard, he's probably referring to "active contributor count" when asking for "count".
 - "stop": The question cannot be answered with available data
 - "create_query": Custom SQL query needed using available data sources (tools can be empty)
 - "pipes": Existing tools can answer the question (specify which tools in the tools array)
+- "ask_clarification": The question is ambiguous or missing critical information. Ask user for clarification before proceeding.
+  * Use this when: timeframe is unclear, metrics are ambiguous, grouping/dimension is not specified, or multiple interpretations exist
+  * The clarification_question must be specific, user-friendly, and guide the user toward providing the missing information
+  * Example: "I need to know the time period for this analysis. Should I show data for the last week, month, or year?"
 
 # IMPORTANT
 - Always check data availability before routing
@@ -95,10 +211,35 @@ ${pipeToolQuestion}
 It must help the following agent to answer the question. You must capture the intent and leave no room for ambiguity.
 You can include things you learned/know, like country codes, timeframes, granularity, etc.
 
+**CRITICAL: Repository/Repo References:**
+- **WHENEVER** the user mentions "repository", "repo", "repositories", or "repos" in their question:
+  - ALWAYS include in reformulated_question: "filter by platforms: git, github, gitlab and gerrit"
+  - This applies to ALL questions with repository references (activity questions, listing questions, comparison questions, etc.)
+  - Examples:
+    * "Which days had no activity in the last month, list by repository" → "List dates with no activity in the last month grouped by repository, filtered by platforms: git, github, gitlab and gerrit"
+    * "Show me commits by repository" → "Show commit count grouped by repository, filtered by platforms: git, github, gitlab and gerrit"
+    * "Top 5 repositories last quarter" → "Top 5 repositories by activity last quarter, filtered by platforms: git, github, gitlab and gerrit"
+
+**CRITICAL: Disambiguate vague terms to prevent over-complicated queries:**
+- "activity" or "commit activity" → reformulate as "commit count" (single metric, not lines changed/added/deleted)
+- "fork activity" → reformulate as "fork count"
+- "star activity" → reformulate as "star count"
+- "PR activity" or "pull request activity" → reformulate as "pull request count"
+- "issue activity" → reformulate as "issue count"
+- **Default to COUNT as the metric unless user explicitly asks for detailed metrics like "lines changed", "lines added", "lines deleted", "files modified", etc.**
+- Example: "Show me commit activity by company" → reformulate as "Show me commit count grouped by company"
+
 ## Reasoning:
-It must be something user-friendly. 
+It must be something user-friendly.
 - If the action is "stop", the reasoning must be something like "I'm unable to answer this question with the available data sources, I am missing access to {DATA, explained in non-technical natural language}. If this looks like a mistake, please contact us."
 - If the action is "create_query", the reasoning must be something like "I'll create a query to answer the question."
 - If the action is "pipes", the reasoning must be something like "I'll use the widgets <tool1> and <tool2> to answer the question."
+- If the action is "ask_clarification", the reasoning must explain what information is missing or ambiguous.
+
+## Clarification Question (only for ask_clarification action):
+- Must be a clear, specific question that helps the user provide the missing information
+- Should offer options or examples when appropriate
+- Must be conversational and friendly
+- Always check for historical context: if with historical context the question is clear, do NOT ask for clarification
 `
 }
diff --git a/frontend/lib/chat/prompts/text-to-sql.ts b/frontend/lib/chat/prompts/text-to-sql.ts
index 058ac5d0f..9ac025d7e 100644
--- a/frontend/lib/chat/prompts/text-to-sql.ts
+++ b/frontend/lib/chat/prompts/text-to-sql.ts
@@ -1,5 +1,27 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
+import { readFileSync } from 'fs'
+import { join } from 'path'
+
+/**
+ * Load reference documentation files
+ */
+function loadReferenceDoc(filename: string): string {
+  try {
+    const path = join(__dirname, filename)
+    return readFileSync(path, 'utf-8')
+  } catch (error) {
+    console.warn(`Failed to load ${filename}:`, error)
+    return ''
+  }
+}
+
+interface SqlErrorContext {
+  errorMessage: string
+  previousQuery: string
+  attemptNumber: number
+}
+
 export const textToSqlPrompt = (
   date: string,
   projectName: string,
@@ -7,8 +29,9 @@ export const textToSqlPrompt = (
   parametersString: string,
   segmentId: string | null,
   reformulatedQuestion: string,
+  errorContext?: SqlErrorContext,
 ) => {
-  return `
+  const basePrompt = `
 You need to generate a SQL query to answer: "${reformulatedQuestion}"
 
 Context:
@@ -22,130 +45,141 @@ Instructions:
 3. THEN IMMEDIATELY write a SQL query that answers the question
 4. You have maximum 3 steps - use them wisely
 5. Your final response MUST contain the SQL query in markdown code block
+6. **IMPORTANT: Always add LIMIT 100 to your query unless a specific limit is mentioned**
 
 CRITICAL: After understanding the schema, stop using tools and write the SQL query!
 
-## CRITICAL ANTI-JOIN RULE
-**For anti-join patterns (finding rows in A that don't exist in B), use LEFT JOIN with empty string check.**
+**NOTE:** The router agent has already analyzed activity types when needed.
+If the reformulated question mentions specific activity types (e.g., "authored-commit", "pull_request-opened"),
+use those types directly in your query. The router has already looked them up in the activityTypes table for you.
+
+## TOP 8 TINYBIRD CONSTRAINTS
+
+1. **Keep queries simple**: Max 3-4 CTEs. More causes parsing errors.
+2. **No correlated subqueries**: Use JOINs or window functions instead.
+3. **No range-based JOINs**: NEVER use \`ON col >= start AND col <= end\` - 
+causes performance errors. Use window functions or simple equality joins.
+4. **Anti-joins use empty string check**: LEFT JOIN with \`WHERE col = ''\`, NOT \`IS NULL\`.
+5. **No semicolons**: Queries must not end with semicolon.
+6. **Window functions**: Use \`lagInFrame()\` not \`LAG()\`, \`leadInFrame()\` not \`LEAD()\`.
+7. **No UNION or UNION ALL**: TinyBird SQL API does not support UNION queries - 
+NEVER use UNION, UNION ALL, or UNION DISTINCT. Return single result set instead.
+8. **ALWAYS add LIMIT 100**: Unless question specifies a different limit, end your query with LIMIT 100.
 
-**IMPORTANT**: Tinybird has two key limitations:
-1. NOT EXISTS with correlated subqueries is NOT supported
-2. LEFT JOIN fills unmatched columns with default values (like '') instead of NULL
+## QUERY COMPLEXITY GUIDELINES
 
-Example - CORRECT (use LEFT JOIN + empty string check):
+**CRITICAL: Prefer simple queries over complex nested CTEs**
+
+- **Maximum 3-4 CTEs** - More than 4 CTEs often causes parsing errors
+- **Use window functions** (lagInFrame, leadInFrame) instead of self-joins
+- **Avoid nested subqueries in FROM** - Only use when pre-aggregating or filtering
+- **Direct table references** preferred over unnecessary subqueries
+
+**Example - SIMPLE (preferred):**
 \`\`\`sql
-WITH table_a_data AS (
-  SELECT id, name FROM table_a WHERE condition
-),
-table_b_ids AS (
-  SELECT DISTINCT a_id FROM table_b WHERE condition
+WITH base_data AS (
+  SELECT ..., lagInFrame(...) OVER (...) as prev_value
+  FROM table WHERE conditions
 )
-SELECT a.id, a.name
-FROM table_a_data a
-LEFT JOIN table_b_ids b ON b.a_id = a.id
-WHERE b.a_id = ''
+SELECT ... FROM base_data WHERE ...
+\`\`\`
+
+**Example - TOO COMPLEX (avoid):**
+\`\`\`sql
+WITH cte1 AS (...), cte2 AS (...), cte3 AS (...), cte4 AS (...), cte5 AS (...)
+SELECT ... -- 5+ CTEs = RISKY
 \`\`\`
 
-Example - AVOID (NOT EXISTS - will fail in Tinybird):
+## COMMON PATTERNS
+
+**Anti-join (finding missing records):**
+\`\`\`sql
+SELECT a.* FROM table_a a
+LEFT JOIN table_b b ON b.id = a.id
+WHERE b.id = ''  -- Use '' not IS NULL
+\`\`\`
+
+**Window functions for analytics:**
 \`\`\`sql
-WHERE NOT EXISTS (SELECT 1 FROM table_b b WHERE b.a_id = a.id)
+SELECT
+  memberId,
+  lagInFrame(value, 1) OVER (PARTITION BY memberId ORDER BY date) as prev_value
+FROM table
 \`\`\`
 
-Example - AVOID (IS NULL check - will fail because Tinybird uses default values):
+**Rolling windows / time-based aggregations:**
+\`\`\`sql
+-- ❌ WRONG: Range-based JOIN (causes performance errors)
+LEFT JOIN dates d ON activity_date >= d.window_start AND activity_date <= d.window_end
+
+-- ✅ RIGHT: Use window functions with ROWS/RANGE frames
+SELECT
+  date,
+  count(*) OVER (ORDER BY date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) as rolling_count
+FROM table
+\`\`\`
+
+**Date range filtering:**
+\`\`\`sql
+-- ❌ WRONG: Cartesian product with range JOIN
+FROM date_range dr LEFT JOIN events e ON e.date >= dr.start AND e.date <= dr.end
+
+-- ✅ RIGHT: Simple WHERE clause or direct aggregation
+SELECT toStartOfInterval(date, INTERVAL 30 DAY) as period, count(*)
+FROM events
+WHERE date >= '2024-01-01' AND date <= '2024-12-31'
+GROUP BY period
+\`\`\`
+
+**Combining different data types (NO UNION allowed):**
+\`\`\`sql
+-- ❌ WRONG: Using UNION (not supported in TinyBird SQL API)
+SELECT 'histogram' as type, bucket, count FROM histogram_buckets
+UNION ALL
+SELECT 'stats' as type, metric_name, value FROM statistics
+
+-- ✅ RIGHT: Single query with all data, use type column or separate queries
+SELECT
+  'histogram' as record_type,
+  bucket as category,
+  count as value,
+  NULL as metric_name
+FROM histogram_buckets
+-- For statistics, either add them as special rows or return in separate query
+-- You cannot combine with UNION in TinyBird
+\`\`\``
+
+  // Add error-specific guidance if this is a retry
+  let errorGuidance = ''
+  if (errorContext) {
+    errorGuidance = `
+
+## ⚠️ PREVIOUS ATTEMPT FAILED - RETRY #${errorContext.attemptNumber}
+
+**Error:** ${errorContext.errorMessage}
+
+**Previous Query:**
 \`\`\`sql
-LEFT JOIN table_b b ON b.a_id = a.id WHERE b.a_id IS NULL
+${errorContext.previousQuery}
 \`\`\`
 
-# TINYBIRD SQL COMPLETE REFERENCE
-
-## TINYBIRD LIMITATIONS
-- **NOT EXISTS with correlated subqueries is completely unsupported**
-- **LEFT JOIN fills unmatched columns with default values (like '') instead of NULL**
-- **Use LEFT JOIN + empty string check (= '') for anti-join patterns, not IS NULL**
-- **No semicolons allowed** - Queries must not end with semicolon
-- **Multi-statements not allowed** - One query per request
-
-## ALLOWED SQL STATEMENTS
-- **ONLY SELECT statements are supported**
-- All SELECT clauses are fully supported: FROM, JOIN, WHERE, GROUP BY, ORDER BY, LIMIT, OFFSET, HAVING, WITH
-- NO INSERT, UPDATE, DELETE, CREATE, DROP, ALTER, or any other DDL/DML statements
-
-## SUPPORTED DATA TYPES
-**Numeric Types:**
-- Integers: Int8, Int16, Int32, Int64, Int128, Int256
-- Unsigned: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256
-- Float: Float32, Float64
-- Decimal: Decimal(P,S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S)
-
-**String Types:**
-- String (variable-length)
-- FixedString(N) (fixed-length)
-
-**Date/Time Types:**
-- Date, Date32
-- DateTime([timezone]), DateTime64(precision, [timezone])
-
-**Other Types:**
-- Bool
-- UUID
-- Array(T)
-- Map(K,V)
-- Nullable(T)
-- LowCardinality(T)
-- JSON (private beta - DO NOT USE)
-
-## AVAILABLE FUNCTIONS
-
-This is an EXHAUSTIVE list of available functions. You should only use functions that are available here:
-
-**Aggregate Functions:**
-count(), avg(), sum(), min(), max(), argMin(), argMax(), any(), anyLast(), 
-stddevPop(), stddevSamp(), varPop(), varSamp(), corr(), covarPop(), covarSamp(),
-groupArray(), groupUniqArray(), groupBitmap(), uniq(), uniqExact(), uniqHLL12(),
-median(), quantile(), quantileExact(), quantileTiming()
-
-**String Functions:**
-length(), empty(), notEmpty(), lower(), upper(), lowerUTF8(), upperUTF8(),
-reverse(), reverseUTF8(), concat(), substring(), substringUTF8(),
-trim(), trimLeft(), trimRight(), trimBoth(), startsWith(), endsWith(),
-replace(), replaceAll(), replaceOne(), position(), positionCaseInsensitive(),
-match(), extract(), extractAll(), like(), notLike(), ilike(), notILike(),
-splitByChar(), splitByString(), arrayStringConcat(), format()
-
-**Date/Time Functions:**
-now(), today(), yesterday(), toYear(), toMonth(), toDayOfMonth(), toDayOfWeek(),
-toHour(), toMinute(), toSecond(), toStartOfYear(), toStartOfMonth(), toStartOfDay(),
-toStartOfHour(), toStartOfMinute(), toMonday(), toDate(), toDateTime(),
-formatDateTime(), dateDiff(), dateAdd(), dateSub(), addDays(), addMonths(),
-addYears(), subtractDays(), subtractMonths(), subtractYears()
-
-**Math Functions:**
-abs(), round(), floor(), ceil(), trunc(), sqrt(), cbrt(), exp(), log(), log2(),
-log10(), sin(), cos(), tan(), asin(), acos(), atan(), pow(), pi(), e(),
-greatest(), least(), max2(), min2()
-
-**Type Conversion Functions:**
-toString(), toInt32(), toInt64(), toUInt32(), toUInt64(), toFloat32(), toFloat64(),
-toDate(), toDateTime(), toDecimal32(), toDecimal64(), toDecimal128(),
-CAST(x AS type)
-
-**Conditional Functions:**
-if(cond, then, else), multiIf(), case when...then...else...end
-
-**Array Functions:**
-length(), empty(), notEmpty(), arrayElement(), has(), hasAll(), hasAny(),
-indexOf(), arrayCount(), arraySum(), arrayAvg(), arrayMin(), arrayMax(),
-arrayUniq(), arrayJoin(), arrayConcat(), arraySlice(), arraySort(), arrayReverse()
-
-**Window Functions:**
-row_number(), rank(), dense_rank(), percent_rank(),
-lagInFrame(x[, offset[, default]]), leadInFrame(x[, offset[, default]]),
-first_value(x), last_value(x), nth_value(x, offset)
-Note: Use lagInFrame() instead of LAG(), leadInFrame() instead of LEAD()
-
-**JSON Functions (if table has JSON columns):**
-JSONExtract(), JSONExtractString(), JSONExtractInt(), JSONExtractFloat(),
-JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength()
-
-Focus on understanding the data schema first, then writing an effective SQL query.`
+**Your Task:** Analyze the error and generate a CORRECTED query. Common fixes:
+- **Unknown function**: Check function name spelling or use available alternatives
+- **Ambiguous column**: Add table aliases or qualify column names
+- **Syntax error**: Check for semicolons, correlated subqueries, or unsupported syntax
+- **Type mismatch**: Add explicit CAST() or use correct comparison types`
+
+    // Load additional reference docs for error resolution
+    const functionsRef = loadReferenceDoc('tinybird-functions.md')
+    const patternsRef = loadReferenceDoc('tinybird-patterns.md')
+
+    if (functionsRef) {
+      errorGuidance += `\n\n## AVAILABLE FUNCTIONS REFERENCE\n${functionsRef}`
+    }
+    if (patternsRef) {
+      errorGuidance += `\n\n## QUERY PATTERNS REFERENCE\n${patternsRef}`
+    }
+  }
+
+  return basePrompt + errorGuidance
 }
diff --git a/frontend/lib/chat/prompts/tinybird-functions.md b/frontend/lib/chat/prompts/tinybird-functions.md
new file mode 100644
index 000000000..1d5425691
--- /dev/null
+++ b/frontend/lib/chat/prompts/tinybird-functions.md
@@ -0,0 +1,78 @@
+# Tinybird SQL Function Reference
+
+This is an EXHAUSTIVE list of available functions in Tinybird SQL. Only use functions listed here.
+
+## Aggregate Functions
+count(), avg(), sum(), min(), max(), argMin(), argMax(), any(), anyLast(),
+stddevPop(), stddevSamp(), varPop(), varSamp(), corr(), covarPop(), covarSamp(),
+groupArray(), groupUniqArray(), groupBitmap(), uniq(), uniqExact(), uniqHLL12(),
+median(), quantile(), quantileExact(), quantileTiming()
+
+## String Functions
+length(), empty(), notEmpty(), lower(), upper(), lowerUTF8(), upperUTF8(),
+reverse(), reverseUTF8(), concat(), substring(), substringUTF8(),
+trim(), trimLeft(), trimRight(), trimBoth(), startsWith(), endsWith(),
+replace(), replaceAll(), replaceOne(), position(), positionCaseInsensitive(),
+match(), extract(), extractAll(), like(), notLike(), ilike(), notILike(),
+splitByChar(), splitByString(), arrayStringConcat(), format()
+
+## Date/Time Functions
+now(), today(), yesterday(), toYear(), toMonth(), toDayOfMonth(), toDayOfWeek(),
+toHour(), toMinute(), toSecond(), toStartOfYear(), toStartOfMonth(), toStartOfDay(),
+toStartOfHour(), toStartOfMinute(), toMonday(), toDate(), toDateTime(),
+formatDateTime(), dateDiff(), dateAdd(), dateSub(), addDays(), addMonths(),
+addYears(), subtractDays(), subtractMonths(), subtractYears()
+
+## Math Functions
+abs(), round(), floor(), ceil(), trunc(), sqrt(), cbrt(), exp(), log(), log2(),
+log10(), sin(), cos(), tan(), asin(), acos(), atan(), pow(), pi(), e(),
+greatest(), least(), max2(), min2()
+
+## Type Conversion Functions
+toString(), toInt32(), toInt64(), toUInt32(), toUInt64(), toFloat32(), toFloat64(),
+toDate(), toDateTime(), toDecimal32(), toDecimal64(), toDecimal128(),
+CAST(x AS type)
+
+## Conditional Functions
+if(cond, then, else), multiIf(), case when...then...else...end
+
+## Array Functions
+length(), empty(), notEmpty(), arrayElement(), has(), hasAll(), hasAny(),
+indexOf(), arrayCount(), arraySum(), arrayAvg(), arrayMin(), arrayMax(),
+arrayUniq(), arrayJoin(), arrayConcat(), arraySlice(), arraySort(), arrayReverse()
+
+## Window Functions
+row_number(), rank(), dense_rank(), percent_rank(),
+lagInFrame(x[, offset[, default]]), leadInFrame(x[, offset[, default]]),
+first_value(x), last_value(x), nth_value(x, offset)
+
+**Note:** Use lagInFrame() instead of LAG(), leadInFrame() instead of LEAD()
+
+## JSON Functions (if table has JSON columns)
+JSONExtract(), JSONExtractString(), JSONExtractInt(), JSONExtractFloat(),
+JSONExtractBool(), JSONExtractArrayRaw(), JSONHas(), JSONLength()
+
+## Supported Data Types
+
+**Numeric Types:**
+- Integers: Int8, Int16, Int32, Int64, Int128, Int256
+- Unsigned: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256
+- Float: Float32, Float64
+- Decimal: Decimal(P,S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S)
+
+**String Types:**
+- String (variable-length)
+- FixedString(N) (fixed-length)
+
+**Date/Time Types:**
+- Date, Date32
+- DateTime([timezone]), DateTime64(precision, [timezone])
+
+**Other Types:**
+- Bool
+- UUID
+- Array(T)
+- Map(K,V)
+- Nullable(T)
+- LowCardinality(T)
+- JSON (private beta - DO NOT USE)
diff --git a/frontend/lib/chat/prompts/tinybird-patterns.md b/frontend/lib/chat/prompts/tinybird-patterns.md
new file mode 100644
index 000000000..bdc6564bb
--- /dev/null
+++ b/frontend/lib/chat/prompts/tinybird-patterns.md
@@ -0,0 +1,135 @@
+# Tinybird SQL Query Patterns
+
+This reference contains common query patterns and anti-patterns for Tinybird SQL.
+
+## Unnecessary Subqueries in FROM - AVOID
+
+❌ **AVOID - Wrapping tables in subqueries:**
+```sql
+SELECT a.memberId, a.timestamp
+FROM (
+  SELECT activityId, memberId, timestamp, type
+  FROM activityRelations_data_copilot
+) AS a
+WHERE a.type = 'commit'  -- ❌ Unnecessary subquery causes ambiguous identifier errors
+```
+
+✅ **PREFER - Direct table reference:**
+```sql
+SELECT a.memberId, a.timestamp
+FROM activityRelations_data_copilot AS a
+WHERE a.type = 'commit'  -- ✅ Simple and clear
+```
+
+✅ **When subqueries ARE needed:**
+```sql
+-- Pre-filtering or aggregation before JOIN
+FROM (
+  SELECT memberId, count(*) as activity_count
+  FROM activityRelations_data_copilot
+  WHERE timestamp >= subtractMonths(now(), 12)
+  GROUP BY memberId
+) AS active_members  -- ✅ Subquery adds value
+```
+
+## Retention Analysis Pattern
+
+For retention/cohort analysis, use window functions with simple aggregations:
+```sql
+WITH user_months AS (
+  SELECT DISTINCT
+    memberId,
+    toStartOfMonth(timestamp) as month
+  FROM activityRelations_data_copilot
+  WHERE segmentId = 'xxx' AND timestamp >= subtractMonths(now(), 12)
+),
+user_months_with_prev AS (
+  SELECT
+    memberId,
+    month,
+    lagInFrame(month, 1) OVER (PARTITION BY memberId ORDER BY month) as prev_month
+  FROM user_months
+),
+retention_calc AS (
+  SELECT
+    month,
+    count(DISTINCT memberId) as active_users,
+    count(DISTINCT CASE
+      WHEN dateDiff('month', prev_month, month) = 1 THEN memberId
+    END) as retained_users
+  FROM user_months_with_prev
+  GROUP BY month
+)
+SELECT
+  formatDateTime(month, '%Y-%m') as month,
+  active_users,
+  retained_users,
+  round((retained_users * 100.0) / nullIf(active_users, 0), 2) as retention_rate
+FROM retention_calc
+ORDER BY month
+```
+
+## Correlated Subqueries - NEVER USE
+
+❌ **AVOID - Tinybird does NOT support:**
+```sql
+SELECT
+  month,
+  (SELECT count(*) FROM table2 WHERE table2.month = table1.month) as count
+FROM table1  -- ❌ Correlated subquery references outer column
+```
+
+✅ **USE - JOIN instead:**
+```sql
+SELECT
+  t1.month,
+  count(t2.id) as count
+FROM table1 t1
+LEFT JOIN table2 t2 ON t2.month = t1.month
+GROUP BY t1.month
+```
+
+✅ **OR USE - Window functions:**
+```sql
+WITH combined AS (
+  SELECT month, id FROM table1
+  UNION ALL
+  SELECT month, id FROM table2
+)
+SELECT DISTINCT
+  month,
+  count(id) OVER (PARTITION BY month) as count
+FROM combined
+```
+
+## Anti-Join Pattern (Finding Missing Records)
+
+**For anti-join patterns (finding rows in A that don't exist in B), use LEFT JOIN with empty string check.**
+
+**IMPORTANT**: Tinybird has two key limitations:
+1. NOT EXISTS with correlated subqueries is NOT supported
+2. LEFT JOIN fills unmatched columns with default values (like '') instead of NULL
+
+✅ **CORRECT (use LEFT JOIN + empty string check):**
+```sql
+WITH table_a_data AS (
+  SELECT id, name FROM table_a WHERE condition
+),
+table_b_ids AS (
+  SELECT DISTINCT a_id FROM table_b WHERE condition
+)
+SELECT a.id, a.name
+FROM table_a_data a
+LEFT JOIN table_b_ids b ON b.a_id = a.id
+WHERE b.a_id = ''
+```
+
+❌ **AVOID (NOT EXISTS - will fail in Tinybird):**
+```sql
+WHERE NOT EXISTS (SELECT 1 FROM table_b b WHERE b.a_id = a.id)
+```
+
+❌ **AVOID (IS NULL check - will fail because Tinybird uses default values):**
+```sql
+LEFT JOIN table_b b ON b.a_id = a.id WHERE b.a_id IS NULL
+```
diff --git a/frontend/lib/chat/tests/auditor.test.ts b/frontend/lib/chat/tests/auditor.test.ts
new file mode 100644
index 000000000..9c6d3cfa6
--- /dev/null
+++ b/frontend/lib/chat/tests/auditor.test.ts
@@ -0,0 +1,187 @@
+// Copyright (c) 2025 The Linux Foundation and each contributor.
+// SPDX-License-Identifier: MIT
+/* eslint-disable @typescript-eslint/no-explicit-any */
+/**
+ * Auditor Agent Tests
+ *
+ * Tests auditor agent validation logic with statistical data summaries
+ */
+
+import { describe, test, expect, beforeAll } from 'vitest'
+import { createAmazonBedrock, type AmazonBedrockProvider } from '@ai-sdk/amazon-bedrock'
+import type { LanguageModelV1 } from 'ai'
+
+import { AuditorAgent } from '../agents/auditor'
+import { generateDataSummary } from '../utils/data-summary'
+import type { ChatMessage } from '../types'
+
+let bedrock: AmazonBedrockProvider | null = null
+
+describe('Auditor Agent', () => {
+  let model: LanguageModelV1
+
+  beforeAll(async () => {
+    // Check if we have the required environment variables
+    const hasAwsCredentials = process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID
+
+    if (!hasAwsCredentials) {
+      console.warn('⚠️ Skipping auditor tests - missing AWS credentials')
+      return
+    }
+
+    // Initialize AWS Bedrock model
+    bedrock = createAmazonBedrock({
+      accessKeyId: process.env.NUXT_AWS_BEDROCK_ACCESS_KEY_ID,
+      secretAccessKey: process.env.NUXT_AWS_BEDROCK_SECRET_ACCESS_KEY,
+      region: process.env.NUXT_AWS_BEDROCK_REGION,
+    })
+
+    const BEDROCK_MODEL_ID = 'us.anthropic.claude-sonnet-4-20250514-v1:0'
+    model = bedrock(BEDROCK_MODEL_ID)
+  }, 30000)
+
+  test('should validate data that answers the question correctly', async () => {
+    if (!bedrock) {
+      console.warn('⚠️ Skipping test - no AWS credentials')
+      return
+    }
+
+    const messages: ChatMessage[] = [
+      { role: 'user', content: 'How many commits were made in January 2025?' },
+    ]
+
+    // Mock data that clearly answers the question
+    const mockData = [
+      { month: '2025-01', commit_count: 150 },
+      { month: '2025-02', commit_count: 200 },
+    ]
+
+    const dataSummary = generateDataSummary(mockData)
+    const agent = new AuditorAgent()
+
+    const result = await agent.execute({
+      model,
+      messages,
+      originalQuestion: 'How many commits were made in January 2025?',
+      reformulatedQuestion: 'Get monthly commit counts for Q1 2025',
+      dataSummary,
+      attemptNumber: 0,
+    })
+
+    expect(result.is_valid).toBe(true)
+    expect(result.summary).toBeDefined()
+    expect(result.reasoning).toBeDefined()
+  }, 30000)
+
+  test('should invalidate data that does not answer the question', async () => {
+    if (!bedrock) {
+      console.warn('⚠️ Skipping test - no AWS credentials')
+      return
+    }
+
+    const messages: ChatMessage[] = [
+      { role: 'user', content: 'What is the average response time for API endpoints?' },
+    ]
+
+    // Mock data that doesn't answer the question (commit data instead of API data)
+    const mockData = [
+      { author: 'john', commit_count: 50 },
+      { author: 'jane', commit_count: 75 },
+    ]
+
+    const dataSummary = generateDataSummary(mockData)
+    const agent = new AuditorAgent()
+
+    const result = await agent.execute({
+      model,
+      messages,
+      originalQuestion: 'What is the average response time for API endpoints?',
+      reformulatedQuestion: 'Calculate average API response time across all endpoints',
+      dataSummary,
+      attemptNumber: 0,
+    })
+
+    expect(result.is_valid).toBe(false)
+    expect(result.feedback_to_router).toBeDefined()
+    expect(result.reasoning).toBeDefined()
+  }, 30000)
+
+  test('should generate efficient token-optimized data summary', () => {
+    const mockData = [
+      { date: '2025-01-01', commits: 10, lines_added: 500, lines_removed: 200 },
+      { date: '2025-01-02', commits: 15, lines_added: 600, lines_removed: 150 },
+      { date: '2025-01-03', commits: 8, lines_added: 300, lines_removed: 100 },
+    ]
+
+    const summary = generateDataSummary(mockData)
+
+    expect(summary.rowCount).toBe(3)
+    expect(summary.columns).toContain('commits')
+    expect(summary.columns).toContain('lines_added')
+    expect(summary.columnStats.commits).toBeDefined()
+    expect(summary.columnStats.commits?.type).toBe('numeric')
+    expect(summary.columnStats.commits?.min).toBe(8)
+    expect(summary.columnStats.commits?.max).toBe(15)
+    expect(summary.columnStats.commits?.avg).toBeDefined()
+  })
+
+  test('should handle empty data gracefully', async () => {
+    if (!bedrock) {
+      console.warn('⚠️ Skipping test - no AWS credentials')
+      return
+    }
+
+    const messages: ChatMessage[] = [
+      { role: 'user', content: 'How many commits were made in January 2025?' },
+    ]
+
+    const emptyData: any[] = []
+    const dataSummary = generateDataSummary(emptyData)
+    const agent = new AuditorAgent()
+
+    const result = await agent.execute({
+      model,
+      messages,
+      originalQuestion: 'How many commits were made in January 2025?',
+      reformulatedQuestion: 'Get monthly commit counts for Q1 2025',
+      dataSummary,
+      attemptNumber: 0,
+    })
+
+    expect(result.is_valid).toBe(false)
+    expect(result.feedback_to_router).toBeDefined()
+  }, 30000)
+
+  test('should provide feedback for retry on second attempt', async () => {
+    if (!bedrock) {
+      console.warn('⚠️ Skipping test - no AWS credentials')
+      return
+    }
+
+    const messages: ChatMessage[] = [
+      { role: 'user', content: 'What is the commit trend over time?' },
+    ]
+
+    // Mock data missing time dimension
+    const mockData = [{ total_commits: 500 }]
+
+    const dataSummary = generateDataSummary(mockData)
+    const agent = new AuditorAgent()
+
+    const previousFeedback =
+      'The data lacks time dimension. Please group by date or month to show trends.'
+
+    const result = await agent.execute({
+      model,
+      messages,
+      originalQuestion: 'What is the commit trend over time?',
+      reformulatedQuestion: 'Show commit activity trends grouped by month',
+      dataSummary,
+      attemptNumber: 1,
+      previousFeedback,
+    })
+
+    expect(result.is_valid).toBe(false)
+    expect(result.reasoning).toContain('time')
+  }, 30000)
+})
diff --git a/frontend/lib/chat/tests/router.test.ts b/frontend/lib/chat/tests/router.test.ts
index 68645b4b8..9267642c5 100644
--- a/frontend/lib/chat/tests/router.test.ts
+++ b/frontend/lib/chat/tests/router.test.ts
@@ -105,7 +105,7 @@ describe('Router Agent', () => {
   function createTestInput(userQuery: string): RouterAgentInput {
     const messages: ChatMessage[] = [{ role: 'user', content: userQuery }]
 
-    console.warn("📝 Creating test input for query:", userQuery)
+    console.warn('📝 Creating test input for query:', userQuery)
 
     return {
       model,
@@ -141,7 +141,6 @@ describe('Router Agent', () => {
       expect(router.temperature).toBe(0)
     })
 
-
     test('should validate output schema for both implementations', () => {
       const originalRouter = new RouterAgent()
 
@@ -179,133 +178,105 @@ describe('Router Agent', () => {
 
   describe('Real AI routing decisions', () => {
     describe('PIPES routing', () => {
-      test('should route activity queries correctly to PIPE', async () => {
-        if (skipIfNoCredentials()) return
-
-        const router = new RouterAgent()
-        const input = createTestInput('Show me commits this week')
-
-        console.warn('🤖 Sending query to router agent: Show me commits this week')
-        const result = await router.execute(input)
-        console.warn('🤖 Router agent response:', result)
-
-        expect(result.next_action).toBeDefined()
-        expect(Object.values(RouterDecisionAction)).toContain(result.next_action)
-        expect(RouterDecisionAction.PIPES).toBe(result.next_action)
-        expect(result.reasoning).toBeTruthy()
-        expect(result.reformulated_question).toBeTruthy()
-        expect(Array.isArray(result.tools)).toBe(true)
-        expect(result.usage.totalTokens).toBeGreaterThan(0)
-
-        console.warn(`🔍 Activity query routed to: ${result.next_action}`)
-        console.warn(`🔍 Reasoning: ${result.reasoning}`)
-      }, 15000)
-
-      test('should route stars query for previous week to PIPE', async () => {
-        if (skipIfNoCredentials()) return
-
-        const router = new RouterAgent()
-        const input = createTestInput('Show me stars for the previous week')
-
-        const result = await router.execute(input)
-
-        expect(result.next_action).toBe(RouterDecisionAction.PIPES)
-        expect(result.reasoning).toBeTruthy()
-        expect(result.reformulated_question).toBeTruthy()
-        expect(Array.isArray(result.tools)).toBe(true)
-
-        console.warn(`🔍 Stars query routed to: ${result.next_action}`)
-        console.warn(`🔍 Reasoning: ${result.reasoning}`)
-      }, 15000)
-
-      test('should route forks query for last year to PIPE', async () => {
-        if (skipIfNoCredentials()) return
-
-        const router = new RouterAgent()
-        const input = createTestInput('Show me forks for the last year')
-
-        const result = await router.execute(input)
-
-        expect(result.next_action).toBe(RouterDecisionAction.PIPES)
-        expect(result.reasoning).toBeTruthy()
-        expect(result.reformulated_question).toBeTruthy()
-        expect(Array.isArray(result.tools)).toBe(true)
-
-        console.warn(`🔍 Forks query routed to: ${result.next_action}`)
-        console.warn(`🔍 Reasoning: ${result.reasoning}`)
-      }, 15000)
-
-      test('should route companies contributing query to PIPE', async () => {
-        if (skipIfNoCredentials()) return
-
-        const router = new RouterAgent()
-        const input = createTestInput('List of companies contributing in project')
-
-        const result = await router.execute(input)
-
-        expect(result.next_action).toBe(RouterDecisionAction.PIPES)
-        expect(result.reasoning).toBeTruthy()
-        expect(result.reformulated_question).toBeTruthy()
-        expect(Array.isArray(result.tools)).toBe(true)
-
-        console.warn(`🔍 Companies query routed to: ${result.next_action}`)
-        console.warn(`🔍 Reasoning: ${result.reasoning}`)
-      }, 15000)
-
-      test('should route geographical queries correctly', async () => {
-        if (skipIfNoCredentials()) return
-
-        const router = new RouterAgent()
-        const input = createTestInput('Show me contributors from Brazil')
-
-        const result = await router.execute(input)
-
-        expect(result.next_action).toBeDefined()
-        expect(Object.values(RouterDecisionAction)).toContain(result.next_action)
-        expect(result.reasoning).toBeTruthy()
-        expect(result.reformulated_question).toContain('Brazil')
-
-        console.warn(`🔍 Geographic query routed to: ${result.next_action}`)
-        console.warn(`🔍 Reasoning: ${result.reasoning}`)
-      }, 15000)
+      test.each([
+        'Show me commits this week',
+        'Show me stars for the previous week',
+        'Show me forks for the last year',
+        'List of companies contributing in project',
+      ])(
+        'should route "%s" to PIPES',
+        async (query) => {
+          if (skipIfNoCredentials()) return
+
+          console.warn(`🤖 Testing query: "${query}"`)
+          const router = new RouterAgent()
+          const input = createTestInput(query)
+          const result = await router.execute(input)
+
+          expect(result.next_action).toBe(RouterDecisionAction.PIPES)
+          expect(result.reasoning).toBeTruthy()
+          expect(result.reformulated_question).toBeTruthy()
+          expect(Array.isArray(result.tools)).toBe(true)
+
+          console.warn(`✅ "${query}" → ${result.next_action}`)
+          console.warn(`🔍 Reasoning: ${result.reasoning}`)
+        },
+        15000,
+      )
     })
 
     describe('CREATE_QUERY (TEXT_TO_SQL) routing', () => {
-      test('should route commit activity by company query to CREATE_QUERY', async () => {
-        if (skipIfNoCredentials()) return
-
-        const router = new RouterAgent()
-        const input = createTestInput('Show me commit activity by company over all time period')
-
-        const result = await router.execute(input)
-
-        expect(result.next_action).toBe(RouterDecisionAction.CREATE_QUERY)
-        expect(result.reasoning).toBeTruthy()
-        expect(result.reformulated_question).toBeTruthy()
-
-        console.warn(`🔍 Commit activity by company query routed to: ${result.next_action}`)
-        console.warn(`🔍 Reasoning: ${result.reasoning}`)
-      }, 15000)
+      test.each(['Show me commit activity by company over all time period'])(
+        'should route "%s" to CREATE_QUERY',
+        async (query) => {
+          if (skipIfNoCredentials()) return
+
+          console.warn(`🤖 Testing query: "${query}"`)
+          const router = new RouterAgent()
+          const input = createTestInput(query)
+          const result = await router.execute(input)
+
+          expect(result.next_action).toBe(RouterDecisionAction.CREATE_QUERY)
+          expect(result.reasoning).toBeTruthy()
+          expect(result.reformulated_question).toBeTruthy()
+
+          console.warn(`✅ "${query}" → ${result.next_action}`)
+          console.warn(`🔍 Reasoning: ${result.reasoning}`)
+        },
+        15000,
+      )
     })
 
     describe('STOP routing', () => {
-      test('should route impossible queries to STOP', async () => {
-        if (skipIfNoCredentials()) return
-
-        const router = new RouterAgent()
-        const input = createTestInput("What's the weather forecast for contributors?")
-
-        const result = await router.execute(input)
-
-        expect(result.next_action).toBe(RouterDecisionAction.STOP)
-        expect(result.reasoning).toContain('weather')
-        expect(result.tools).toEqual([])
-
-        console.warn(`🔍 Weather query routed to: ${result.next_action}`)
-        console.warn(`🔍 Reasoning: ${result.reasoning}`)
-      }, 15000)
+      test.each([
+        "What's the weather forecast for contributors?",
+        "Show me contributors from Brazil",
+      ])(
+        'should route "%s" to STOP',
+        async (query) => {
+          if (skipIfNoCredentials()) return
+
+          console.warn(`🤖 Testing query: "${query}"`)
+          const router = new RouterAgent()
+          const input = createTestInput(query)
+          const result = await router.execute(input)
+
+          expect(result.next_action).toBe(RouterDecisionAction.STOP)
+          expect(result.reasoning).toBeTruthy()
+          expect(result.tools).toEqual([])
+
+          console.warn(`✅ "${query}" → ${result.next_action}`)
+          console.warn(`🔍 Reasoning: ${result.reasoning}`)
+        },
+        15000,
+      )
     })
 
+    describe('ASK_CLARIFICATION routing', () => {
+      test.each([
+        "Show me the activity",
+        "Give me stats for last period",
+        "Show me metrics",
+      ])(
+        'should route "%s" to ASK_CLARIFICATION',
+        async (query) => {
+          if (skipIfNoCredentials()) return
+
+          console.warn(`🤖 Testing query: "${query}"`)
+          const router = new RouterAgent()
+          const input = createTestInput(query)
+          const result = await router.execute(input)
+
+          expect(result.next_action).toBe(RouterDecisionAction.ASK_CLARIFICATION)
+          expect(result.reasoning).toBeTruthy()
+          expect(result.clarification_question).toBeTruthy()
+
+          console.warn(`✅ "${query}" → ${result.next_action}`)
+          console.warn(`🔍 Reasoning: ${result.reasoning}`)
+          console.warn(`❓ Clarification: ${result.clarification_question}`)
+        },
+        15000,
+      )
+    })
   })
-
 })
diff --git a/frontend/lib/chat/types.ts b/frontend/lib/chat/types.ts
index 24f41cea5..95e949fd3 100644
--- a/frontend/lib/chat/types.ts
+++ b/frontend/lib/chat/types.ts
@@ -94,10 +94,16 @@ export const routerOutputSchema = z.object({
     RouterDecisionAction.STOP,
     RouterDecisionAction.CREATE_QUERY,
     RouterDecisionAction.PIPES,
+    RouterDecisionAction.ASK_CLARIFICATION,
   ]),
   reasoning: z.string().describe('Maximum 2 sentences explaining the decision'),
   reformulated_question: z.string().describe('Enhanced query with all parameters'),
   tools: z.array(z.string()).describe('Tools needed for next agent'),
+  clarification_question: z
+    .string()
+    .optional()
+    .nullable()
+    .describe('Question to ask user when next_action is ASK_CLARIFICATION'),
 })
 
 // Pipe agent output schema
@@ -108,9 +114,21 @@ export const pipeOutputSchema = z.object({
   ),
 })
 
+// Auditor agent output schema
+export const auditorOutputSchema = z.object({
+  is_valid: z.boolean().describe('true = data answers question, false = needs retry'),
+  reasoning: z.string().describe('2-3 sentences explaining the validation decision'),
+  feedback_to_router: z
+    .string()
+    .optional()
+    .describe('If invalid, specific guidance for router to fix the issue'),
+  summary: z.string().optional().describe('If valid, user-friendly summary of findings'),
+})
+
 // TypeScript types for agent outputs
 export type RouterOutput = z.infer<typeof routerOutputSchema> & { usage?: any }
 export type PipeOutput = z.infer<typeof pipeOutputSchema> & { usage?: any }
+export type AuditorOutput = z.infer<typeof auditorOutputSchema> & { usage?: any }
 
 // ============================================
 // Agent Input Types
@@ -131,6 +149,7 @@ export interface RouterAgentInput {
   pipe: string
   parametersString: string
   segmentId: string | null
+  previousWasClarification?: boolean
 }
 
 export interface PipeAgentStreamInput extends Omit<PipeAgentInput, 'model' | 'tools' | 'date'> {
@@ -154,17 +173,23 @@ export interface PipeAgentInput {
 }
 
 export interface DataCopilotQueryInput {
-  messages: ChatMessage[]
+  currentQuestion: string // The current user question
   segmentId?: string
   projectName?: string
   pipe: string
   parameters?: Record<string, unknown>
-  conversationId?: string
+  conversationId: string
   insightsDbPool: Pool
   userEmail: string
   dataStream: DataStreamWriter // DataStreamWriter from AI SDK
 }
 
+export interface SqlErrorContext {
+  errorMessage: string
+  previousQuery: string
+  attemptNumber: number
+}
+
 export interface TextToSqlAgentInput {
   messages: ChatMessage[]
   date: string
@@ -173,6 +198,7 @@ export interface TextToSqlAgentInput {
   parametersString: string
   segmentId: string
   reformulatedQuestion: string
+  errorContext?: SqlErrorContext
 }
 
 export interface TextToSqlAgentStreamInput {
@@ -184,6 +210,17 @@ export interface TextToSqlAgentStreamInput {
   segmentId: string
   reformulatedQuestion: string
   dataStream: any
+  errorContext?: SqlErrorContext
+}
+
+export interface AuditorAgentInput {
+  model: any
+  messages: ChatMessage[]
+  originalQuestion: string
+  reformulatedQuestion: string
+  dataSummary: import('./utils/data-summary').DataSummary
+  attemptNumber: number
+  previousFeedback?: string
 }
 
 export interface AgentResponseCompleteParams {
diff --git a/frontend/lib/chat/utils/data-summary.ts b/frontend/lib/chat/utils/data-summary.ts
new file mode 100644
index 000000000..77d0b6801
--- /dev/null
+++ b/frontend/lib/chat/utils/data-summary.ts
@@ -0,0 +1,135 @@
+// Copyright (c) 2025 The Linux Foundation and each contributor.
+// SPDX-License-Identifier: MIT
+
+export interface DataSummary {
+  rowCount: number
+  columns: string[]
+  columnStats: Record<string, ColumnStats>
+}
+
+export interface ColumnStats {
+  type: 'numeric' | 'string' | 'date' | 'boolean'
+  nullCount: number
+  nullPercentage: number
+
+  // Numeric stats
+  min?: number
+  max?: number
+  sum?: number
+  avg?: number
+  hasAllZeros?: boolean
+
+  // String/categorical stats
+  distinctCount?: number
+  minLength?: number
+  maxLength?: number
+
+  // Date stats
+  earliestDate?: string
+  latestDate?: string
+  dateRange?: string
+}
+
+/**
+ * Generate statistical summary of dataset
+ * Token-efficient: ~400-500 tokens for typical dataset
+ * No raw data samples sent to LLM - only statistics
+ *
+ * @param data - Array of data rows
+ * @returns Statistical summary optimized for auditor validation
+ */
+export function generateDataSummary<T extends Record<string, unknown>>(data: T[]): DataSummary {
+  if (!data || data.length === 0) {
+    return {
+      rowCount: 0,
+      columns: [],
+      columnStats: {},
+    }
+  }
+
+  const columns = Object.keys(data[0] || {})
+  const columnStats: Record<string, ColumnStats> = {}
+
+  for (const col of columns) {
+    const values = data.map((row) => row[col])
+    const nonNullValues = values.filter((v) => v !== null && v !== undefined && v !== '')
+    const nullCount = data.length - nonNullValues.length
+    const nullPercentage = Math.round((nullCount / data.length) * 100)
+
+    if (nonNullValues.length === 0) {
+      // All nulls - mark as string type with full null percentage
+      columnStats[col] = {
+        type: 'string',
+        nullCount,
+        nullPercentage,
+      }
+      continue
+    }
+
+    const firstValue = nonNullValues[0]
+    let stats: ColumnStats
+
+    // Numeric columns
+    if (typeof firstValue === 'number') {
+      const numericValues = nonNullValues as number[]
+      const sum = numericValues.reduce((a, b) => a + b, 0)
+      const hasAllZeros = numericValues.every((v) => v === 0)
+
+      stats = {
+        type: 'numeric',
+        nullCount,
+        nullPercentage,
+        min: Math.min(...numericValues),
+        max: Math.max(...numericValues),
+        sum,
+        avg: Math.round((sum / numericValues.length) * 100) / 100, // Round to 2 decimals
+        hasAllZeros,
+      }
+    }
+    // Boolean columns
+    else if (typeof firstValue === 'boolean') {
+      stats = {
+        type: 'boolean',
+        nullCount,
+        nullPercentage,
+        distinctCount: new Set(nonNullValues).size,
+      }
+    }
+    // Date columns (detect date strings)
+    else if (typeof firstValue === 'string' && !isNaN(Date.parse(firstValue))) {
+      const dates = nonNullValues.map((v) => new Date(v as string))
+      const earliest = new Date(Math.min(...dates.map((d) => d.getTime())))
+      const latest = new Date(Math.max(...dates.map((d) => d.getTime())))
+
+      stats = {
+        type: 'date',
+        nullCount,
+        nullPercentage,
+        distinctCount: new Set(nonNullValues).size,
+        earliestDate: earliest.toISOString().split('T')[0],
+        latestDate: latest.toISOString().split('T')[0],
+        dateRange: `${earliest.toISOString().split('T')[0]} to ${latest.toISOString().split('T')[0]}`,
+      }
+    }
+    // String columns
+    else {
+      const stringValues = nonNullValues.map((v) => String(v))
+      stats = {
+        type: 'string',
+        nullCount,
+        nullPercentage,
+        distinctCount: new Set(stringValues).size,
+        minLength: Math.min(...stringValues.map((s) => s.length)),
+        maxLength: Math.max(...stringValues.map((s) => s.length)),
+      }
+    }
+
+    columnStats[col] = stats
+  }
+
+  return {
+    rowCount: data.length,
+    columns,
+    columnStats,
+  }
+}
diff --git a/frontend/nuxt.config.ts b/frontend/nuxt.config.ts
index 6d362e2af..564dc186e 100644
--- a/frontend/nuxt.config.ts
+++ b/frontend/nuxt.config.ts
@@ -57,6 +57,7 @@ export default defineNuxtConfig({
     insightsDbUsername: 'postgres',
     insightsDbPassword: 'example',
     insightsDbDatabase: 'insights',
+    cmDbEnabled: isProduction,
     cmDbWriteHost: "localhost",
     cmDbReadHost: "localhost",
     cmDbPort: 5432,
diff --git a/frontend/server/api/chat/stream.ts b/frontend/server/api/chat/stream.ts
index 048891d51..9464e4f1b 100644
--- a/frontend/server/api/chat/stream.ts
+++ b/frontend/server/api/chat/stream.ts
@@ -4,12 +4,11 @@ import type { Pool } from 'pg'
 import { createDataStreamResponse } from 'ai'
 import { DataCopilot } from '~~/lib/chat/data-copilot'
 import { InsightsProjectsRepository } from '~~/server/repo/insightsProjects.repo'
-import { ChatMessage } from '~~/lib/chat/types'
 
 export const maxDuration = 30
 
 interface IStreamRequestBody {
-  messages: ChatMessage[]
+  messages: Array<{ role: 'user' | 'assistant'; content: string }>
   projectSlug?: string
   projectName?: string
   pipe: string
@@ -34,6 +33,12 @@ export default defineEventHandler(async (event): Promise<Response | Error> => {
       return createError({ statusCode: 400, statusMessage: 'Project slug is required' })
     }
 
+    const question = messages?.filter((m) => m.role === 'user').pop()?.content
+
+    if (!question) {
+      return createError({ statusCode: 400, statusMessage: 'Question is required' })
+    }
+
     // Generate conversationId if not provided
     const finalConversationId = conversationId || crypto.randomUUID()
 
@@ -55,7 +60,7 @@ export default defineEventHandler(async (event): Promise<Response | Error> => {
     return createDataStreamResponse({
       execute: async (dataStream) => {
         await dataCopilot.streamingAgentRequestHandler({
-          messages,
+          currentQuestion: question,
           segmentId: insightsProjects.segmentId,
           projectName,
           pipe,
@@ -63,7 +68,7 @@ export default defineEventHandler(async (event): Promise<Response | Error> => {
           conversationId: finalConversationId,
           insightsDbPool,
           userEmail: event.context.user.email,
-          dataStream, // Pass the dataStream to the class
+          dataStream,
         })
       },
     })
diff --git a/frontend/server/middleware/database.ts b/frontend/server/middleware/database.ts
index 6f4dfcb6e..67fdd949f 100644
--- a/frontend/server/middleware/database.ts
+++ b/frontend/server/middleware/database.ts
@@ -1,12 +1,17 @@
 // Copyright (c) 2025 The Linux Foundation and each contributor.
 // SPDX-License-Identifier: MIT
-import { getInsightsDbPool, getCMDbPool } from '../utils/db';
+import { getInsightsDbPool, getCMDbPool } from '../utils/db'
 
 export default defineEventHandler(async (event) => {
+  const config = useRuntimeConfig()
+
   // Only apply to chat endpoints
   if (event.node.req.url?.startsWith('/api/chat/')) {
     // Add the database pool to the event context
-    event.context.insightsDbPool = getInsightsDbPool();
-    event.context.cmDbPool = getCMDbPool();
+    event.context.insightsDbPool = getInsightsDbPool()
+
+    if (config.cmDbEnabled) {
+      event.context.cmDbPool = getCMDbPool()
+    }
   }
-});
\ No newline at end of file
+})
diff --git a/frontend/server/repo/chat.repo.ts b/frontend/server/repo/chat.repo.ts
index da56fe989..a6a8ec9d8 100644
--- a/frontend/server/repo/chat.repo.ts
+++ b/frontend/server/repo/chat.repo.ts
@@ -50,32 +50,140 @@ export interface ChatResponse {
   routerReason: string
   pipeInstructions?: PipeInstructions
   sqlQuery?: string
+  clarificationQuestion?: string
   model: string
   inputTokens?: number
   outputTokens?: number
 }
 
+export interface IChatResponseDb {
+  id: string
+  created_at: Date
+  created_by: string
+  user_prompt: string
+  router_response: RouterDecisionAction
+  router_reason: string
+  pipe_instructions: PipeInstructions | null
+  sql_query: string | null
+  clarification_question: string | null
+  model: string
+  input_tokens: number | null
+  output_tokens: number | null
+  feedback: number | null
+  conversation_id: string | null
+
+}
+
 export class ChatRepository {
   constructor(private pool: Pool) {}
 
+  async createInitialChatResponse(
+    userPrompt: string,
+    userEmail: string,
+    conversationId?: string,
+  ): Promise<string> {
+    const query = `
+      INSERT INTO chat_responses (created_by, user_prompt, conversation_id, model)
+      VALUES ($1, $2, $3, $4)
+      RETURNING id
+    `
+    const result = await this.pool.query(query, [
+      userEmail,
+      userPrompt,
+      conversationId || null,
+      'pending',
+    ])
+    return result.rows[0].id
+  }
+
+  async updateChatResponse(
+    chatResponseId: string,
+    response: Omit<ChatResponse, 'userPrompt'>,
+  ): Promise<void> {
+    const query = `
+      UPDATE chat_responses
+      SET
+        router_response = $1,
+        router_reason = $2,
+        pipe_instructions = $3,
+        sql_query = $4,
+        clarification_question = $5,
+        model = $6,
+        input_tokens = $7,
+        output_tokens = $8
+      WHERE id = $9
+    `
+    await this.pool.query(query, [
+      response.routerResponse,
+      response.routerReason,
+      response.pipeInstructions ? JSON.stringify(response.pipeInstructions) : null,
+      response.sqlQuery,
+      response.clarificationQuestion,
+      response.model,
+      response.inputTokens,
+      response.outputTokens,
+      chatResponseId,
+    ])
+  }
+
+  async saveAgentStep(step: {
+    chatResponseId: string
+    agent: 'ROUTER' | 'PIPE' | 'TEXT_TO_SQL' | 'AUDITOR' | 'CHART' | 'EXECUTE_INSTRUCTIONS'
+    model?: string
+    response?: string | object
+    inputTokens?: number
+    outputTokens?: number
+    responseTimeSeconds: number
+    instructions?: string
+    errorMessage?: string
+  }): Promise<void> {
+    const query = `
+      INSERT INTO chat_response_agent_steps
+      (
+        chat_response_id, 
+        model, 
+        agent, 
+        response, 
+        input_tokens, 
+        output_tokens, 
+        response_time_seconds, 
+        instructions, 
+        error_message
+      )
+      VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
+    `
+    await this.pool.query(query, [
+      step.chatResponseId,
+      step.model || null,
+      step.agent,
+      step.response ? JSON.stringify(step.response) : null,
+      step.inputTokens || 0,
+      step.outputTokens || 0,
+      step.responseTimeSeconds,
+      step.instructions || null,
+      step.errorMessage || null,
+    ])
+  }
+
   async saveChatResponse(response: ChatResponse, userEmail: string): Promise<string> {
     try {
       const query = `
-      INSERT INTO chat_responses 
-      ( 
-        created_by, 
+      INSERT INTO chat_responses
+      (
+        created_by,
         user_prompt,
-        router_response, 
-        router_reason, 
-        pipe_instructions, 
-        sql_query, 
-        model, 
-        input_tokens, 
-        output_tokens, 
+        router_response,
+        router_reason,
+        pipe_instructions,
+        sql_query,
+        clarification_question,
+        model,
+        input_tokens,
+        output_tokens,
         feedback,
         conversation_id
       )
-      VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
+      VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
       RETURNING id
     `
 
@@ -86,6 +194,7 @@ export class ChatRepository {
         response.routerReason,
         response.pipeInstructions ? JSON.stringify(response.pipeInstructions) : null,
         response.sqlQuery,
+        response.clarificationQuestion,
         response.model,
         response.inputTokens,
         response.outputTokens,
@@ -122,13 +231,24 @@ export class ChatRepository {
 
   async getLatestChatResponseByConversation(conversationId: string): Promise<ChatResponse | null> {
     const query = `
-      SELECT * FROM chat_responses 
-      WHERE conversation_id = $1 
-      ORDER BY created_at DESC 
+      SELECT * FROM chat_responses
+      WHERE conversation_id = $1
+      ORDER BY created_at DESC
       LIMIT 1
     `
 
     const result = await this.pool.query(query, [conversationId])
     return result.rows.length > 0 ? result.rows[0] : null
   }
+
+  async getChatResponsesByConversation(conversationId: string): Promise<IChatResponseDb[]> {
+    const query = `
+      SELECT * FROM chat_responses
+      WHERE conversation_id = $1
+      ORDER BY created_at ASC
+    `
+
+    const result = await this.pool.query(query, [conversationId])
+    return result.rows
+  }
 }