diff --git a/app/api/agent/route.ts b/app/api/agent/route.ts index 0cffea9..7a94a04 100644 --- a/app/api/agent/route.ts +++ b/app/api/agent/route.ts @@ -1,4 +1,4 @@ -import { NextResponse } from 'next/server'; +import { NextResponse } from "next/server"; import { openai } from "@ai-sdk/openai"; import { CoreMessage, generateObject, UserContent } from "ai"; import { z } from "zod"; @@ -19,7 +19,15 @@ async function runStagehand({ instruction, }: { sessionID: string; - method: "GOTO" | "ACT" | "EXTRACT" | "CLOSE" | "SCREENSHOT" | "OBSERVE" | "WAIT" | "NAVBACK"; + method: + | "GOTO" + | "ACT" + | "EXTRACT" + | "CLOSE" + | "SCREENSHOT" + | "OBSERVE" + | "WAIT" + | "NAVBACK"; instruction?: string; }) { const stagehand = new Stagehand({ @@ -97,20 +105,23 @@ async function sendPrompt({ try { const stagehand = new Stagehand({ browserbaseSessionID: sessionID, - env: "BROWSERBASE" + env: "BROWSERBASE", }); await stagehand.init(); currentUrl = await stagehand.page.url(); await stagehand.close(); } catch (error) { - console.error('Error getting page info:', error); + console.error("Error getting page info:", error); } const content: UserContent = [ { type: "text", - text: `Consider the following screenshot of a web page${currentUrl ? ` (URL: ${currentUrl})` : ''}, with the goal being "${goal}". -${previousSteps.length > 0 + text: `Consider the following screenshot of a web page${ + currentUrl ? ` (URL: ${currentUrl})` : "" + }, with the goal being "${goal}". +${ + previousSteps.length > 0 ? `Previous steps taken: ${previousSteps .map( @@ -141,7 +152,10 @@ If the goal has been achieved, return "close".`, ]; // Add screenshot if navigated to a page previously - if (previousSteps.length > 0 && previousSteps.some((step) => step.tool === "GOTO")) { + if ( + previousSteps.length > 0 && + previousSteps.some((step) => step.tool === "GOTO") + ) { content.push({ type: "image", image: (await runStagehand({ @@ -193,32 +207,34 @@ If the goal has been achieved, return "close".`, async function selectStartingUrl(goal: string) { const message: CoreMessage = { role: "user", - content: [{ - type: "text", - text: `Given the goal: "${goal}", determine the best URL to start from. + content: [ + { + type: "text", + text: `Given the goal: "${goal}", determine the best URL to start from. Choose from: 1. A relevant search engine (Google, Bing, etc.) 2. A direct URL if you're confident about the target website 3. Any other appropriate starting point -Return a URL that would be most effective for achieving this goal.` - }] +Return a URL that would be most effective for achieving this goal.`, + }, + ], }; const result = await generateObject({ model: LLMClient, schema: z.object({ url: z.string().url(), - reasoning: z.string() + reasoning: z.string(), }), - messages: [message] + messages: [message], }); return result.object; } export async function GET() { - return NextResponse.json({ message: 'Agent API endpoint ready' }); + return NextResponse.json({ message: "Agent API endpoint ready" }); } export async function POST(request: Request) { @@ -228,17 +244,17 @@ export async function POST(request: Request) { if (!sessionId) { return NextResponse.json( - { error: 'Missing sessionId in request body' }, + { error: "Missing sessionId in request body" }, { status: 400 } ); } // Handle different action types switch (action) { - case 'START': { + case "START": { if (!goal) { return NextResponse.json( - { error: 'Missing goal in request body' }, + { error: "Missing goal in request body" }, { status: 400 } ); } @@ -249,27 +265,21 @@ export async function POST(request: Request) { text: `Navigating to ${url}`, reasoning, tool: "GOTO" as const, - instruction: url + instruction: url, }; - - await runStagehand({ - sessionID: sessionId, - method: "GOTO", - instruction: url - }); - return NextResponse.json({ + return NextResponse.json({ success: true, result: firstStep, - steps: [firstStep], - done: false + steps: [], + done: false, }); } - case 'GET_NEXT_STEP': { + case "GET_NEXT_STEP": { if (!goal) { return NextResponse.json( - { error: 'Missing goal in request body' }, + { error: "Missing goal in request body" }, { status: 400 } ); } @@ -285,15 +295,15 @@ export async function POST(request: Request) { success: true, result, steps: newPreviousSteps, - done: result.tool === "CLOSE" + done: result.tool === "CLOSE", }); } - case 'EXECUTE_STEP': { + case "EXECUTE_STEP": { const { step } = body; if (!step) { return NextResponse.json( - { error: 'Missing step in request body' }, + { error: "Missing step in request body" }, { status: 400 } ); } @@ -308,21 +318,21 @@ export async function POST(request: Request) { return NextResponse.json({ success: true, extraction, - done: step.tool === "CLOSE" + done: step.tool === "CLOSE", }); } default: return NextResponse.json( - { error: 'Invalid action type' }, + { error: "Invalid action type" }, { status: 400 } ); } } catch (error) { - console.error('Error in agent endpoint:', error); + console.error("Error in agent endpoint:", error); return NextResponse.json( - { success: false, error: 'Failed to process request' }, + { success: false, error: "Failed to process request" }, { status: 500 } ); } -} \ No newline at end of file +} diff --git a/app/components/ChatFeed.tsx b/app/components/ChatFeed.tsx index 6f9c812..26a02df 100644 --- a/app/components/ChatFeed.tsx +++ b/app/components/ChatFeed.tsx @@ -32,6 +32,10 @@ export default function ChatFeed({ initialMessage, onClose }: ChatFeedProps) { const initializationRef = useRef(false); const chatContainerRef = useRef(null); const [isAgentFinished, setIsAgentFinished] = useState(false); + const [pendingStep, setPendingStep] = useState<{ + step: BrowserStep; + sessionId: string; + } | null>(null); const agentStateRef = useRef({ sessionId: null, @@ -57,6 +61,62 @@ export default function ChatFeed({ initialMessage, onClose }: ChatFeedProps) { } }, []); + const updateNextStep = useCallback(async () => { + try { + if (!agentStateRef.current.sessionId) { + console.error("No session ID available"); + return; + } + + const body = { + goal: initialMessage, + sessionId: agentStateRef.current.sessionId, + previousSteps: agentStateRef.current.steps, + action: "GET_NEXT_STEP", + }; + + const nextStepResponse = await fetch("/api/agent", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }); + + const nextStepData = await nextStepResponse.json(); + + if (!nextStepData.success) { + console.error("Next step error:", nextStepData); + throw new Error(nextStepData.error || "Failed to get next step"); + } + + const nextStep = { + ...nextStepData.result, + stepNumber: agentStateRef.current.steps.length + 1, + }; + + const steps = [...agentStateRef.current.steps, nextStep]; + + agentStateRef.current = { + ...agentStateRef.current, + steps, + }; + + setUiState((prev) => ({ + ...prev, + steps, + })); + + setPendingStep({ + step: nextStep, + sessionId: agentStateRef.current.sessionId!, + }); + console.log("Pending step:", nextStep); + } catch (error) { + console.error("Error getting next step:", error); + } + }, [initialMessage]); + useEffect(() => { if ( uiState.steps.length > 0 && @@ -79,6 +139,55 @@ export default function ChatFeed({ initialMessage, onClose }: ChatFeedProps) { scrollToBottom(); }, [uiState.steps, scrollToBottom]); + const handleExecuteStep = async () => { + if (!pendingStep) return; + + // Clear the pending step before getting the next one + const pendingStepCopy = pendingStep; + setPendingStep(null); + + try { + const executeResponse = await fetch("/api/agent", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + sessionId: pendingStepCopy.sessionId, + step: pendingStepCopy.step, + action: "EXECUTE_STEP", + }), + }); + + const executeData = await executeResponse.json(); + + if (!executeData.success) { + throw new Error("Failed to execute step"); + } + + // Add a small delay to ensure the current step is processed + setTimeout(() => { + updateNextStep(); + }, 500); + } catch (error) { + console.error("Error executing step:", error); + } + }; + + // Add event listener for tab key + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === "Tab") { + e.preventDefault(); // Prevent default tab behavior + // handleExecuteStep(); + alert("Tab key pressed"); + } + }; + + window.addEventListener("keydown", handleKeyDown); + return () => window.removeEventListener("keydown", handleKeyDown); + }, []); + useEffect(() => { console.log("useEffect called"); const initializeSession = async () => { @@ -136,7 +245,7 @@ export default function ChatFeed({ initialMessage, onClose }: ChatFeedProps) { const data = await response.json(); if (data.success) { - const newStep = { + const firstStep = { text: data.result.text, reasoning: data.result.reasoning, tool: data.result.tool, @@ -146,80 +255,18 @@ export default function ChatFeed({ initialMessage, onClose }: ChatFeedProps) { agentStateRef.current = { ...agentStateRef.current, - steps: [newStep], + steps: [firstStep], }; setUiState((prev) => ({ ...prev, - steps: [newStep], + steps: [firstStep], })); - // Continue with subsequent steps - while (true) { - // Get next step from LLM - const nextStepResponse = await fetch("/api/agent", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - goal: initialMessage, - sessionId: sessionData.sessionId, - previousSteps: agentStateRef.current.steps, - action: "GET_NEXT_STEP", - }), - }); - - const nextStepData = await nextStepResponse.json(); - - if (!nextStepData.success) { - throw new Error("Failed to get next step"); - } - - // Add the next step to UI immediately after receiving it - const nextStep = { - ...nextStepData.result, - stepNumber: agentStateRef.current.steps.length + 1, - }; - - agentStateRef.current = { - ...agentStateRef.current, - steps: [...agentStateRef.current.steps, nextStep], - }; - - setUiState((prev) => ({ - ...prev, - steps: agentStateRef.current.steps, - })); - - // Break after adding the CLOSE step to UI - if (nextStepData.done || nextStepData.result.tool === "CLOSE") { - break; - } - - // Execute the step - const executeResponse = await fetch("/api/agent", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - sessionId: sessionData.sessionId, - step: nextStepData.result, - action: "EXECUTE_STEP", - }), - }); - - const executeData = await executeResponse.json(); - - if (!executeData.success) { - throw new Error("Failed to execute step"); - } - - if (executeData.done) { - break; - } - } + setPendingStep({ + step: firstStep, + sessionId: sessionData.sessionId, + }); } } catch (error) { console.error("Session initialization error:", error); @@ -230,7 +277,7 @@ export default function ChatFeed({ initialMessage, onClose }: ChatFeedProps) { }; initializeSession(); - }, [initialMessage]); + }, [initialMessage, updateNextStep]); // Spring configuration for smoother animations const springConfig = { @@ -377,7 +424,11 @@ export default function ChatFeed({ initialMessage, onClose }: ChatFeedProps) {
@@ -392,6 +443,20 @@ export default function ChatFeed({ initialMessage, onClose }: ChatFeedProps) { Reasoning: {step.reasoning}

+ {step.tool === "CLOSE" ? ( + + ) : ( + pendingStep?.step.stepNumber === step.stepNumber && ( + + ) + )} ))} {isLoading && (