diff --git a/src/__tests__/fixture-loader.test.ts b/src/__tests__/fixture-loader.test.ts index b904536..94aef38 100644 --- a/src/__tests__/fixture-loader.test.ts +++ b/src/__tests__/fixture-loader.test.ts @@ -844,4 +844,75 @@ describe("validateFixtures", () => { true, ); }); + + it("error: reasoning is not a string", () => { + const fixtures = [makeFixture({ response: { content: "hi", reasoning: 123 } as never })]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => r.severity === "error" && r.message.includes("reasoning must be a string"), + ), + ).toBe(true); + }); + + it("warning: reasoning is empty string", () => { + const fixtures = [makeFixture({ response: { content: "hi", reasoning: "" } })]; + const results = validateFixtures(fixtures); + expect( + results.some((r) => r.severity === "warning" && r.message.includes("reasoning is empty")), + ).toBe(true); + }); + + it("error: webSearches is not an array", () => { + const fixtures = [ + makeFixture({ response: { content: "hi", webSearches: "not-array" } as never }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => r.severity === "error" && r.message.includes("webSearches must be an array"), + ), + ).toBe(true); + }); + + it("error: webSearches element is not a string", () => { + const fixtures = [ + makeFixture({ response: { content: "hi", webSearches: ["valid", 42] } as never }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => r.severity === "error" && r.message.includes("webSearches[1] is not a string"), + ), + ).toBe(true); + }); + + it("accepts valid reasoning and webSearches", () => { + const fixtures = [ + makeFixture({ + response: { content: "hi", reasoning: "thinking...", webSearches: ["query1", "query2"] }, + }), + ]; + expect(validateFixtures(fixtures)).toEqual([]); + }); + + it("warning: webSearches is empty array", () => { + const fixtures = [makeFixture({ response: { content: "hi", webSearches: [] } })]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => r.severity === "warning" && r.message.includes("webSearches is empty array"), + ), + ).toBe(true); + }); + + it("warning: webSearches element is empty string", () => { + const fixtures = [makeFixture({ response: { content: "hi", webSearches: ["valid", ""] } })]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => r.severity === "warning" && r.message.includes("webSearches[1] is empty string"), + ), + ).toBe(true); + }); }); diff --git a/src/__tests__/reasoning-web-search.test.ts b/src/__tests__/reasoning-web-search.test.ts new file mode 100644 index 0000000..93cf588 --- /dev/null +++ b/src/__tests__/reasoning-web-search.test.ts @@ -0,0 +1,551 @@ +import { describe, it, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import type { Fixture } from "../types.js"; +import { createServer, type ServerInstance } from "../server.js"; + +// --- helpers --- + +function post( + url: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +interface SSEEvent { + type: string; + [key: string]: unknown; +} + +function parseResponsesSSEEvents(body: string): SSEEvent[] { + const events: SSEEvent[] = []; + const lines = body.split("\n"); + for (const line of lines) { + if (line.startsWith("data: ")) { + events.push(JSON.parse(line.slice(6)) as SSEEvent); + } + } + return events; +} + +const parseClaudeSSEEvents = parseResponsesSSEEvents; + +// --- fixtures --- + +const reasoningFixture: Fixture = { + match: { userMessage: "think" }, + response: { + content: "The answer is 42.", + reasoning: "Let me think step by step about this problem.", + }, +}; + +const webSearchFixture: Fixture = { + match: { userMessage: "search" }, + response: { + content: "Here are the results.", + webSearches: ["latest news", "weather forecast"], + }, +}; + +const combinedFixture: Fixture = { + match: { userMessage: "combined" }, + response: { + content: "Based on my analysis and research.", + reasoning: "I need to reason through this carefully.", + webSearches: ["relevant data"], + }, +}; + +const plainFixture: Fixture = { + match: { userMessage: "plain" }, + response: { content: "Just plain text." }, +}; + +const allFixtures: Fixture[] = [reasoningFixture, webSearchFixture, combinedFixture, plainFixture]; + +// --- tests --- + +let instance: ServerInstance | null = null; + +afterEach(async () => { + if (instance) { + await new Promise((resolve) => { + instance!.server.close(() => resolve()); + }); + instance = null; + } +}); + +// ─── Responses API: Reasoning events ───────────────────────────────────────── + +describe("POST /v1/responses (reasoning streaming)", () => { + it("emits reasoning events before text events", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "think" }], + stream: true, + }); + + expect(res.status).toBe(200); + const events = parseResponsesSSEEvents(res.body); + const types = events.map((e) => e.type); + + expect(types).toContain("response.output_item.added"); + expect(types).toContain("response.reasoning_summary_part.added"); + expect(types).toContain("response.reasoning_summary_text.delta"); + expect(types).toContain("response.reasoning_summary_text.done"); + expect(types).toContain("response.reasoning_summary_part.done"); + + const reasoningDoneIdx = types.indexOf("response.reasoning_summary_text.done"); + const firstTextDelta = types.indexOf("response.output_text.delta"); + expect(reasoningDoneIdx).toBeLessThan(firstTextDelta); + }); + + it("reasoning deltas reconstruct full reasoning text", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "think" }], + stream: true, + }); + + const events = parseResponsesSSEEvents(res.body); + const reasoningDeltas = events.filter( + (e) => e.type === "response.reasoning_summary_text.delta", + ); + const fullReasoning = reasoningDeltas.map((d) => d.delta).join(""); + expect(fullReasoning).toBe("Let me think step by step about this problem."); + }); + + it("text deltas still reconstruct full content", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "think" }], + stream: true, + }); + + const events = parseResponsesSSEEvents(res.body); + const textDeltas = events.filter((e) => e.type === "response.output_text.delta"); + const fullText = textDeltas.map((d) => d.delta).join(""); + expect(fullText).toBe("The answer is 42."); + }); + + it("response.completed includes reasoning output item", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "think" }], + stream: true, + }); + + const events = parseResponsesSSEEvents(res.body); + const completed = events.find((e) => e.type === "response.completed") as SSEEvent & { + response: { output: { type: string }[] }; + }; + expect(completed).toBeDefined(); + expect(completed.response.output.length).toBeGreaterThanOrEqual(2); + expect(completed.response.output[0].type).toBe("reasoning"); + expect(completed.response.output[completed.response.output.length - 1].type).toBe("message"); + }); +}); + +// ─── Responses API: Web search events ──────────────────────────────────────── + +describe("POST /v1/responses (web search streaming)", () => { + it("emits web search events before text events", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "search" }], + stream: true, + }); + + expect(res.status).toBe(200); + const events = parseResponsesSSEEvents(res.body); + const types = events.map((e) => e.type); + + const searchAddedEvents = events.filter( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "web_search_call", + ); + expect(searchAddedEvents).toHaveLength(2); + + const searchDoneEvents = events.filter( + (e) => + e.type === "response.output_item.done" && + (e.item as { type: string })?.type === "web_search_call", + ); + expect(searchDoneEvents).toHaveLength(2); + + const lastSearchDoneIdx = events.reduce( + (acc, e, idx) => + e.type === "response.output_item.done" && + (e.item as { type: string })?.type === "web_search_call" + ? idx + : acc, + -1, + ); + const firstTextDelta = types.indexOf("response.output_text.delta"); + expect(lastSearchDoneIdx).toBeLessThan(firstTextDelta); + }); + + it("web search items contain query strings", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "search" }], + stream: true, + }); + + const events = parseResponsesSSEEvents(res.body); + const searchDone = events.filter( + (e) => + e.type === "response.output_item.done" && + (e.item as { type: string })?.type === "web_search_call", + ) as (SSEEvent & { item: { query: string } })[]; + + expect(searchDone[0].item.query).toBe("latest news"); + expect(searchDone[1].item.query).toBe("weather forecast"); + }); + + it("response.completed includes web search output items", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "search" }], + stream: true, + }); + + const events = parseResponsesSSEEvents(res.body); + const completed = events.find((e) => e.type === "response.completed") as SSEEvent & { + response: { output: { type: string; query?: string }[] }; + }; + expect(completed).toBeDefined(); + + const searchOutputs = completed.response.output.filter((o) => o.type === "web_search_call"); + expect(searchOutputs).toHaveLength(2); + expect(searchOutputs[0].query).toBe("latest news"); + expect(searchOutputs[1].query).toBe("weather forecast"); + }); +}); + +// ─── Responses API: Combined reasoning + web search + text ─────────────────── + +describe("POST /v1/responses (combined reasoning + web search)", () => { + it("emits reasoning, then web search, then text events in order", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "combined" }], + stream: true, + }); + + expect(res.status).toBe(200); + const events = parseResponsesSSEEvents(res.body); + const types = events.map((e) => e.type); + + expect(types).toContain("response.reasoning_summary_text.delta"); + + const webSearchAdded = events.filter( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "web_search_call", + ); + expect(webSearchAdded).toHaveLength(1); + + expect(types).toContain("response.output_text.delta"); + + const reasoningDoneIdx = types.indexOf("response.reasoning_summary_text.done"); + const firstWebSearch = events.findIndex( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "web_search_call", + ); + const firstTextDelta = types.indexOf("response.output_text.delta"); + + expect(reasoningDoneIdx).toBeLessThan(firstWebSearch); + expect(firstWebSearch).toBeLessThan(firstTextDelta); + }); + + it("response.completed output includes all item types in order", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "combined" }], + stream: true, + }); + + const events = parseResponsesSSEEvents(res.body); + const completed = events.find((e) => e.type === "response.completed") as SSEEvent & { + response: { output: { type: string }[] }; + }; + expect(completed).toBeDefined(); + + const outputTypes = completed.response.output.map((o) => o.type); + expect(outputTypes).toEqual(["reasoning", "web_search_call", "message"]); + }); +}); + +// ─── Responses API: Non-streaming with reasoning ───────────────────────────── + +describe("POST /v1/responses (non-streaming with reasoning)", () => { + it("includes reasoning output item in non-streaming response", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "think" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.object).toBe("response"); + expect(body.status).toBe("completed"); + + expect(body.output.length).toBeGreaterThanOrEqual(2); + expect(body.output[0].type).toBe("reasoning"); + expect(body.output[0].summary[0].text).toBe("Let me think step by step about this problem."); + expect(body.output[body.output.length - 1].type).toBe("message"); + expect(body.output[body.output.length - 1].content[0].text).toBe("The answer is 42."); + }); + + it("includes web search output items in non-streaming response", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "search" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + + const searchOutputs = body.output.filter((o: { type: string }) => o.type === "web_search_call"); + expect(searchOutputs).toHaveLength(2); + expect(searchOutputs[0].query).toBe("latest news"); + expect(searchOutputs[1].query).toBe("weather forecast"); + }); + + it("combined non-streaming response has correct output order", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "combined" }], + stream: false, + }); + + const body = JSON.parse(res.body); + const outputTypes = body.output.map((o: { type: string }) => o.type); + expect(outputTypes).toEqual(["reasoning", "web_search_call", "message"]); + }); +}); + +// ─── Responses API: Plain text still works ─────────────────────────────────── + +describe("POST /v1/responses (backward compatibility)", () => { + it("plain text fixture works without reasoning or web search", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "plain" }], + stream: true, + }); + + expect(res.status).toBe(200); + const events = parseResponsesSSEEvents(res.body); + const types = events.map((e) => e.type); + + expect(types).not.toContain("response.reasoning_summary_text.delta"); + + const webSearchEvents = events.filter( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "web_search_call", + ); + expect(webSearchEvents).toHaveLength(0); + + const deltas = events.filter((e) => e.type === "response.output_text.delta"); + const fullText = deltas.map((d) => d.delta).join(""); + expect(fullText).toBe("Just plain text."); + }); + + it("plain text non-streaming response has no extra output items", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "plain" }], + stream: false, + }); + + const body = JSON.parse(res.body); + expect(body.output).toHaveLength(1); + expect(body.output[0].type).toBe("message"); + }); +}); + +// ─── Anthropic Claude: Thinking blocks ─────────────────────────────────────── + +describe("POST /v1/messages (thinking blocks streaming)", () => { + it("emits thinking block before text block when reasoning is present", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/messages`, { + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + messages: [{ role: "user", content: "think" }], + stream: true, + }); + + expect(res.status).toBe(200); + const events = parseClaudeSSEEvents(res.body); + const types = events.map((e) => e.type); + + expect(types[0]).toBe("message_start"); + + const blockStarts = events.filter((e) => e.type === "content_block_start"); + expect(blockStarts).toHaveLength(2); + + const thinkingBlock = blockStarts[0] as SSEEvent & { + index: number; + content_block: { type: string }; + }; + expect(thinkingBlock.index).toBe(0); + expect(thinkingBlock.content_block.type).toBe("thinking"); + + const textBlock = blockStarts[1] as SSEEvent & { + index: number; + content_block: { type: string }; + }; + expect(textBlock.index).toBe(1); + expect(textBlock.content_block.type).toBe("text"); + }); + + it("thinking deltas reconstruct full reasoning text", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/messages`, { + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + messages: [{ role: "user", content: "think" }], + stream: true, + }); + + const events = parseClaudeSSEEvents(res.body); + const thinkingDeltas = events.filter( + (e) => + e.type === "content_block_delta" && + (e.delta as { type: string })?.type === "thinking_delta", + ) as (SSEEvent & { delta: { thinking: string } })[]; + + expect(thinkingDeltas.length).toBeGreaterThan(0); + const fullThinking = thinkingDeltas.map((d) => d.delta.thinking).join(""); + expect(fullThinking).toBe("Let me think step by step about this problem."); + }); + + it("text deltas still reconstruct full content after thinking", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/messages`, { + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + messages: [{ role: "user", content: "think" }], + stream: true, + }); + + const events = parseClaudeSSEEvents(res.body); + const textDeltas = events.filter( + (e) => + e.type === "content_block_delta" && (e.delta as { type: string })?.type === "text_delta", + ) as (SSEEvent & { delta: { text: string } })[]; + + const fullText = textDeltas.map((d) => d.delta.text).join(""); + expect(fullText).toBe("The answer is 42."); + }); + + it("no thinking blocks when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/messages`, { + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + messages: [{ role: "user", content: "plain" }], + stream: true, + }); + + const events = parseClaudeSSEEvents(res.body); + const thinkingBlocks = events.filter( + (e) => + e.type === "content_block_start" && + (e.content_block as { type: string })?.type === "thinking", + ); + expect(thinkingBlocks).toHaveLength(0); + + const blockStarts = events.filter((e) => e.type === "content_block_start"); + expect(blockStarts).toHaveLength(1); + expect((blockStarts[0].content_block as { type: string }).type).toBe("text"); + }); +}); + +describe("POST /v1/messages (thinking blocks non-streaming)", () => { + it("includes thinking block in non-streaming response", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/messages`, { + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + messages: [{ role: "user", content: "think" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.type).toBe("message"); + expect(body.content).toHaveLength(2); + expect(body.content[0].type).toBe("thinking"); + expect(body.content[0].thinking).toBe("Let me think step by step about this problem."); + expect(body.content[1].type).toBe("text"); + expect(body.content[1].text).toBe("The answer is 42."); + }); + + it("no thinking block in non-streaming response when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/messages`, { + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + messages: [{ role: "user", content: "plain" }], + stream: false, + }); + + const body = JSON.parse(res.body); + expect(body.content).toHaveLength(1); + expect(body.content[0].type).toBe("text"); + }); +}); diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts index 78a32b2..fc154c4 100644 --- a/src/__tests__/stream-collapse.test.ts +++ b/src/__tests__/stream-collapse.test.ts @@ -1684,3 +1684,111 @@ describe("collapseStreamingResponse bedrock SSE", () => { expect(result!.content).toBe("bedrock-sse"); }); }); + +// --------------------------------------------------------------------------- +// Reasoning and web search collapse +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE with reasoning", () => { + it("extracts reasoning from Responses API reasoning_summary_text.delta events", () => { + const body = [ + `data: ${JSON.stringify({ type: "response.created", response: {} })}`, + "", + `data: ${JSON.stringify({ type: "response.reasoning_summary_text.delta", delta: "Let me " })}`, + "", + `data: ${JSON.stringify({ type: "response.reasoning_summary_text.delta", delta: "think." })}`, + "", + `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Answer" })}`, + "", + `data: ${JSON.stringify({ type: "response.completed", response: {} })}`, + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("Answer"); + expect(result.reasoning).toBe("Let me think."); + }); + + it("extracts web searches from Responses API output_item.done events", () => { + const body = [ + `data: ${JSON.stringify({ type: "response.created", response: {} })}`, + "", + `data: ${JSON.stringify({ + type: "response.output_item.done", + item: { type: "web_search_call", status: "completed", query: "test query" }, + })}`, + "", + `data: ${JSON.stringify({ + type: "response.output_item.done", + item: { type: "web_search_call", status: "completed", query: "another query" }, + })}`, + "", + `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Result" })}`, + "", + `data: ${JSON.stringify({ type: "response.completed", response: {} })}`, + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("Result"); + expect(result.webSearches).toEqual(["test query", "another query"]); + }); + + it("returns undefined reasoning and webSearches when not present", () => { + const body = [ + `data: ${JSON.stringify({ type: "response.output_text.delta", delta: "Plain" })}`, + "", + `data: ${JSON.stringify({ type: "response.completed", response: {} })}`, + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("Plain"); + expect(result.reasoning).toBeUndefined(); + expect(result.webSearches).toBeUndefined(); + }); +}); + +describe("collapseAnthropicSSE with thinking", () => { + it("extracts reasoning from thinking_delta events", () => { + const body = [ + `event: content_block_start\ndata: ${JSON.stringify({ index: 0, content_block: { type: "thinking" } })}`, + "", + `event: content_block_delta\ndata: ${JSON.stringify({ index: 0, delta: { type: "thinking_delta", thinking: "Hmm " } })}`, + "", + `event: content_block_delta\ndata: ${JSON.stringify({ index: 0, delta: { type: "thinking_delta", thinking: "interesting" } })}`, + "", + `event: content_block_stop\ndata: ${JSON.stringify({ index: 0 })}`, + "", + `event: content_block_start\ndata: ${JSON.stringify({ index: 1, content_block: { type: "text", text: "" } })}`, + "", + `event: content_block_delta\ndata: ${JSON.stringify({ index: 1, delta: { type: "text_delta", text: "Answer" } })}`, + "", + `event: content_block_stop\ndata: ${JSON.stringify({ index: 1 })}`, + "", + `event: message_stop\ndata: {}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.content).toBe("Answer"); + expect(result.reasoning).toBe("Hmm interesting"); + }); + + it("returns undefined reasoning when no thinking blocks", () => { + const body = [ + `event: content_block_start\ndata: ${JSON.stringify({ index: 0, content_block: { type: "text", text: "" } })}`, + "", + `event: content_block_delta\ndata: ${JSON.stringify({ index: 0, delta: { type: "text_delta", text: "Plain" } })}`, + "", + `event: content_block_stop\ndata: ${JSON.stringify({ index: 0 })}`, + "", + `event: message_stop\ndata: {}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.content).toBe("Plain"); + expect(result.reasoning).toBeUndefined(); + }); +}); diff --git a/src/__tests__/ws-responses.test.ts b/src/__tests__/ws-responses.test.ts index ca9bfab..86c459a 100644 --- a/src/__tests__/ws-responses.test.ts +++ b/src/__tests__/ws-responses.test.ts @@ -25,7 +25,12 @@ const errorFixture: Fixture = { }, }; -const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture]; +const reasoningFixture: Fixture = { + match: { userMessage: "think" }, + response: { content: "The answer.", reasoning: "Let me reason about this." }, +}; + +const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture, reasoningFixture]; // --- tests --- @@ -447,4 +452,22 @@ describe("WebSocket /v1/responses", () => { expect(entry!.response.interrupted).toBe(true); expect(entry!.response.interruptReason).toBe("disconnectAfterMs"); }); + + it("streams reasoning events before text via WebSocket", async () => { + instance = await createServer(allFixtures); + const ws = await connectWebSocket(instance.url, "/v1/responses"); + + ws.send(responseCreateMsg("think")); + + const raw = await ws.waitForMessages(15); + const events = parseEvents(raw); + const types = events.map((e) => e.type); + + expect(types).toContain("response.reasoning_summary_text.delta"); + expect(types).toContain("response.output_text.delta"); + + const reasoningIdx = types.indexOf("response.reasoning_summary_text.delta"); + const textIdx = types.indexOf("response.output_text.delta"); + expect(reasoningIdx).toBeLessThan(textIdx); + }); }); diff --git a/src/fixture-loader.ts b/src/fixture-loader.ts index 4230b78..1b35eb6 100644 --- a/src/fixture-loader.ts +++ b/src/fixture-loader.ts @@ -154,6 +154,54 @@ export function validateFixtures(fixtures: Fixture[]): ValidationResult[] { message: "content is empty string", }); } + if (response.reasoning !== undefined) { + if (typeof response.reasoning !== "string") { + results.push({ + severity: "error", + fixtureIndex: i, + message: "reasoning must be a string", + }); + } else if (response.reasoning === "") { + results.push({ + severity: "warning", + fixtureIndex: i, + message: "reasoning is empty string — no reasoning events will be emitted", + }); + } + } + if (response.webSearches !== undefined) { + if (!Array.isArray(response.webSearches)) { + results.push({ + severity: "error", + fixtureIndex: i, + message: "webSearches must be an array of strings", + }); + } else if (response.webSearches.length === 0) { + results.push({ + severity: "warning", + fixtureIndex: i, + message: "webSearches is empty array — no web search events will be emitted", + }); + } else { + for (let j = 0; j < response.webSearches.length; j++) { + if (typeof response.webSearches[j] !== "string") { + results.push({ + severity: "error", + fixtureIndex: i, + message: `webSearches[${j}] is not a string`, + }); + break; + } + if (response.webSearches[j] === "") { + results.push({ + severity: "warning", + fixtureIndex: i, + message: `webSearches[${j}] is empty string`, + }); + } + } + } + } } // Tool call response checks diff --git a/src/messages.ts b/src/messages.ts index 8d93e27..ee17efc 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -198,6 +198,7 @@ function buildClaudeTextStreamEvents( content: string, model: string, chunkSize: number, + reasoning?: string, ): ClaudeSSEEvent[] { const msgId = generateMessageId(); const events: ClaudeSSEEvent[] = []; @@ -217,10 +218,37 @@ function buildClaudeTextStreamEvents( }, }); - // content_block_start + let blockIndex = 0; + + // Thinking block (emitted before text when reasoning is present) + if (reasoning) { + events.push({ + type: "content_block_start", + index: blockIndex, + content_block: { type: "thinking", thinking: "" }, + }); + + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { type: "thinking_delta", thinking: slice }, + }); + } + + events.push({ + type: "content_block_stop", + index: blockIndex, + }); + + blockIndex++; + } + + // content_block_start (text) events.push({ type: "content_block_start", - index: 0, + index: blockIndex, content_block: { type: "text", text: "" }, }); @@ -229,7 +257,7 @@ function buildClaudeTextStreamEvents( const slice = content.slice(i, i + chunkSize); events.push({ type: "content_block_delta", - index: 0, + index: blockIndex, delta: { type: "text_delta", text: slice }, }); } @@ -237,7 +265,7 @@ function buildClaudeTextStreamEvents( // content_block_stop events.push({ type: "content_block_stop", - index: 0, + index: blockIndex, }); // message_delta @@ -337,12 +365,20 @@ function buildClaudeToolCallStreamEvents( // Non-streaming response builders -function buildClaudeTextResponse(content: string, model: string): object { +function buildClaudeTextResponse(content: string, model: string, reasoning?: string): object { + const contentBlocks: object[] = []; + + if (reasoning) { + contentBlocks.push({ type: "thinking", thinking: reasoning }); + } + + contentBlocks.push({ type: "text", text: content }); + return { id: generateMessageId(), type: "message", role: "assistant", - content: [{ type: "text", text: content }], + content: contentBlocks, model, stop_reason: "end_turn", stop_sequence: null, @@ -569,6 +605,11 @@ export async function handleMessages( // Text response if (isTextResponse(response)) { + if (response.webSearches?.length) { + defaults.logger.warn( + "webSearches in fixture response are not supported for Claude Messages API — ignoring", + ); + } const journalEntry = journal.add({ method: req.method ?? "POST", path: req.url ?? "/v1/messages", @@ -577,11 +618,20 @@ export async function handleMessages( response: { status: 200, fixture }, }); if (claudeReq.stream !== true) { - const body = buildClaudeTextResponse(response.content, completionReq.model); + const body = buildClaudeTextResponse( + response.content, + completionReq.model, + response.reasoning, + ); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); } else { - const events = buildClaudeTextStreamEvents(response.content, completionReq.model, chunkSize); + const events = buildClaudeTextStreamEvents( + response.content, + completionReq.model, + chunkSize, + response.reasoning, + ); const interruption = createInterruptionSignal(fixture); const completed = await writeClaudeSSEStream(res, events, { latency, diff --git a/src/responses.ts b/src/responses.ts index 6d10735..f31f5dc 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -165,12 +165,17 @@ export function buildTextStreamEvents( content: string, model: string, chunkSize: number, + reasoning?: string, + webSearches?: string[], ): ResponsesSSEEvent[] { const respId = responseId(); const msgId = itemId(); const created = Math.floor(Date.now() / 1000); const events: ResponsesSSEEvent[] = []; + let msgOutputIndex = 0; + const prefixOutputItems: object[] = []; + // response.created events.push({ type: "response.created", @@ -197,10 +202,34 @@ export function buildTextStreamEvents( }, }); + if (reasoning) { + const reasoningEvents = buildReasoningStreamEvents(reasoning, model, chunkSize); + events.push(...reasoningEvents); + const doneEvent = reasoningEvents.find( + (e) => + e.type === "response.output_item.done" && + (e.item as { type: string })?.type === "reasoning", + ); + if (doneEvent) prefixOutputItems.push(doneEvent.item as object); + msgOutputIndex++; + } + + if (webSearches && webSearches.length > 0) { + const searchEvents = buildWebSearchStreamEvents(webSearches, msgOutputIndex); + events.push(...searchEvents); + const doneEvents = searchEvents.filter( + (e) => + e.type === "response.output_item.done" && + (e.item as { type: string })?.type === "web_search_call", + ); + for (const de of doneEvents) prefixOutputItems.push(de.item as object); + msgOutputIndex += webSearches.length; + } + // output_item.added (message) events.push({ type: "response.output_item.added", - output_index: 0, + output_index: msgOutputIndex, item: { type: "message", id: msgId, @@ -213,7 +242,7 @@ export function buildTextStreamEvents( // content_part.added events.push({ type: "response.content_part.added", - output_index: 0, + output_index: msgOutputIndex, content_index: 0, part: { type: "output_text", text: "" }, }); @@ -224,7 +253,7 @@ export function buildTextStreamEvents( events.push({ type: "response.output_text.delta", item_id: msgId, - output_index: 0, + output_index: msgOutputIndex, content_index: 0, delta: slice, }); @@ -233,7 +262,7 @@ export function buildTextStreamEvents( // output_text.done events.push({ type: "response.output_text.done", - output_index: 0, + output_index: msgOutputIndex, content_index: 0, text: content, }); @@ -241,22 +270,24 @@ export function buildTextStreamEvents( // content_part.done events.push({ type: "response.content_part.done", - output_index: 0, + output_index: msgOutputIndex, content_index: 0, part: { type: "output_text", text: content }, }); + const msgItem = { + type: "message", + id: msgId, + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: content }], + }; + // output_item.done events.push({ type: "response.output_item.done", - output_index: 0, - item: { - type: "message", - id: msgId, - status: "completed", - role: "assistant", - content: [{ type: "output_text", text: content }], - }, + output_index: msgOutputIndex, + item: msgItem, }); // response.completed @@ -268,15 +299,7 @@ export function buildTextStreamEvents( created_at: created, model, status: "completed", - output: [ - { - type: "message", - id: msgId, - status: "completed", - role: "assistant", - content: [{ type: "output_text", text: content }], - }, - ], + output: [...prefixOutputItems, msgItem], usage: { input_tokens: 0, output_tokens: 0, @@ -402,26 +425,151 @@ export function buildToolCallStreamEvents( return events; } +function buildReasoningStreamEvents( + reasoning: string, + model: string, + chunkSize: number, +): ResponsesSSEEvent[] { + const reasoningId = generateId("rs"); + const events: ResponsesSSEEvent[] = []; + + events.push({ + type: "response.output_item.added", + output_index: 0, + item: { + type: "reasoning", + id: reasoningId, + summary: [], + }, + }); + + events.push({ + type: "response.reasoning_summary_part.added", + output_index: 0, + summary_index: 0, + part: { type: "summary_text", text: "" }, + }); + + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + events.push({ + type: "response.reasoning_summary_text.delta", + item_id: reasoningId, + output_index: 0, + summary_index: 0, + delta: slice, + }); + } + + events.push({ + type: "response.reasoning_summary_text.done", + output_index: 0, + summary_index: 0, + text: reasoning, + }); + + events.push({ + type: "response.reasoning_summary_part.done", + output_index: 0, + summary_index: 0, + part: { type: "summary_text", text: reasoning }, + }); + + events.push({ + type: "response.output_item.done", + output_index: 0, + item: { + type: "reasoning", + id: reasoningId, + summary: [{ type: "summary_text", text: reasoning }], + }, + }); + + return events; +} + +function buildWebSearchStreamEvents( + queries: string[], + startOutputIndex: number, +): ResponsesSSEEvent[] { + const events: ResponsesSSEEvent[] = []; + + for (let i = 0; i < queries.length; i++) { + const searchId = generateId("ws"); + const outputIndex = startOutputIndex + i; + + events.push({ + type: "response.output_item.added", + output_index: outputIndex, + item: { + type: "web_search_call", + id: searchId, + status: "in_progress", + query: queries[i], + }, + }); + + events.push({ + type: "response.output_item.done", + output_index: outputIndex, + item: { + type: "web_search_call", + id: searchId, + status: "completed", + query: queries[i], + }, + }); + } + + return events; +} + // Non-streaming response builders -function buildTextResponse(content: string, model: string): object { +function buildTextResponse( + content: string, + model: string, + reasoning?: string, + webSearches?: string[], +): object { const respId = responseId(); const msgId = itemId(); + const output: object[] = []; + + if (reasoning) { + output.push({ + type: "reasoning", + id: generateId("rs"), + summary: [{ type: "summary_text", text: reasoning }], + }); + } + + if (webSearches && webSearches.length > 0) { + for (const query of webSearches) { + output.push({ + type: "web_search_call", + id: generateId("ws"), + status: "completed", + query, + }); + } + } + + output.push({ + type: "message", + id: msgId, + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: content }], + }); + return { id: respId, object: "response", created_at: Math.floor(Date.now() / 1000), model, status: "completed", - output: [ - { - type: "message", - id: msgId, - status: "completed", - role: "assistant", - content: [{ type: "output_text", text: content }], - }, - ], + output, usage: { input_tokens: 0, output_tokens: 0, total_tokens: 0 }, }; } @@ -633,11 +781,22 @@ export async function handleResponses( response: { status: 200, fixture }, }); if (responsesReq.stream !== true) { - const body = buildTextResponse(response.content, completionReq.model); + const body = buildTextResponse( + response.content, + completionReq.model, + response.reasoning, + response.webSearches, + ); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); } else { - const events = buildTextStreamEvents(response.content, completionReq.model, chunkSize); + const events = buildTextStreamEvents( + response.content, + completionReq.model, + chunkSize, + response.reasoning, + response.webSearches, + ); const interruption = createInterruptionSignal(fixture); const completed = await writeResponsesSSEStream(res, events, { latency, diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts index 73316e6..01c5d87 100644 --- a/src/stream-collapse.ts +++ b/src/stream-collapse.ts @@ -19,6 +19,8 @@ import type { Logger } from "./logger.js"; // ambiguous results and simplify downstream consumers. export interface CollapseResult { content?: string; + reasoning?: string; + webSearches?: string[]; toolCalls?: ToolCall[]; droppedChunks?: number; truncated?: boolean; @@ -38,6 +40,8 @@ export interface CollapseResult { export function collapseOpenAISSE(body: string): CollapseResult { const lines = body.split("\n\n").filter((l) => l.trim().length > 0); let content = ""; + let reasoning = ""; + const webSearchQueries: string[] = []; let droppedChunks = 0; const toolCallMap = new Map(); @@ -56,6 +60,35 @@ export function collapseOpenAISSE(body: string): CollapseResult { continue; } + // Responses API reasoning events + if ( + parsed.type === "response.reasoning_summary_text.delta" && + typeof parsed.delta === "string" + ) { + reasoning += parsed.delta; + continue; + } + + // Responses API web search events + if (parsed.type === "response.output_item.done") { + const item = parsed.item as Record | undefined; + if (item?.type === "web_search_call" && typeof item.query === "string") { + webSearchQueries.push(item.query); + continue; + } + } + + // Responses API text content events + if (parsed.type === "response.output_text.delta" && typeof parsed.delta === "string") { + content += parsed.delta; + continue; + } + + // Skip other Responses API structural events + if (typeof parsed.type === "string" && parsed.type.startsWith("response.")) { + continue; + } + const choices = parsed.choices as Array> | undefined; if (!choices || choices.length === 0) continue; @@ -108,7 +141,12 @@ export function collapseOpenAISSE(body: string): CollapseResult { }; } - return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; + return { + content, + ...(reasoning ? { reasoning } : {}), + ...(webSearchQueries.length > 0 ? { webSearches: webSearchQueries } : {}), + ...(droppedChunks > 0 ? { droppedChunks } : {}), + }; } // --------------------------------------------------------------------------- @@ -125,6 +163,7 @@ export function collapseOpenAISSE(body: string): CollapseResult { export function collapseAnthropicSSE(body: string): CollapseResult { const blocks = body.split("\n\n").filter((b) => b.trim().length > 0); let content = ""; + let reasoning = ""; let droppedChunks = 0; const toolCallMap = new Map(); @@ -166,6 +205,10 @@ export function collapseAnthropicSSE(body: string): CollapseResult { content += delta.text; } + if (delta.type === "thinking_delta" && typeof delta.thinking === "string") { + reasoning += delta.thinking; + } + if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") { const entry = toolCallMap.get(index); if (entry) { @@ -183,11 +226,16 @@ export function collapseAnthropicSSE(body: string): CollapseResult { arguments: tc.arguments, ...(tc.id ? { id: tc.id } : {}), })), + ...(reasoning ? { reasoning } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), }; } - return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; + return { + content, + ...(reasoning ? { reasoning } : {}), + ...(droppedChunks > 0 ? { droppedChunks } : {}), + }; } // --------------------------------------------------------------------------- diff --git a/src/types.ts b/src/types.ts index 3bbae4d..2b64e4d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -60,6 +60,8 @@ export interface FixtureMatch { export interface TextResponse { content: string; + reasoning?: string; + webSearches?: string[]; role?: string; finishReason?: string; } diff --git a/src/ws-responses.ts b/src/ws-responses.ts index 60ab4b7..6b92c20 100644 --- a/src/ws-responses.ts +++ b/src/ws-responses.ts @@ -194,7 +194,14 @@ async function processMessage( body: completionReq, response: { status: 200, fixture }, }); - const events = buildTextStreamEvents(response.content, completionReq.model, chunkSize); + + const events = buildTextStreamEvents( + response.content, + completionReq.model, + chunkSize, + response.reasoning, + response.webSearches, + ); const interruption = createInterruptionSignal(fixture); const completed = await sendEvents( ws,