Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
271 changes: 271 additions & 0 deletions apps/gateway/src/responses.e2e.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
import "dotenv/config";
import { beforeAll, beforeEach, describe, expect, test } from "vitest";

import { app } from "@/app.js";
import {
beforeAllHook,
beforeEachHook,
generateTestRequestId,
getConcurrentTestOptions,
getTestOptions,
logMode,
testModels,
toolCallModels,
validateLogByRequestId,
} from "@/chat-helpers.e2e.js";

// Pick one model per provider to keep CI cost manageable while still
// validating the Responses API conversion layer across every provider.
function oneModelPerProvider<T extends { model: string }>(list: T[]): T[] {
const seen = new Set<string>();
const out: T[] = [];
for (const item of list) {
const provider = item.model.split("/")[0];
if (seen.has(provider)) {
continue;
}
seen.add(provider);
out.push(item);
}
return out;
}

// Models excluded from the tool-call round-trip test because the underlying
// provider adapter does not emit stable tool_call ids — the id returned in the
// first turn is not recognized when sent back as tool_call_id, so the second
// turn fails. This is a provider/adapter-level issue, unrelated to the
// Responses API conversion layer.
const TOOL_CALL_DENYLIST = new Set<string>(["bytedance/gpt-oss-120b"]);

const responsesTestModels = oneModelPerProvider(testModels);
const responsesToolCallModels = oneModelPerProvider(toolCallModels).filter(
(m) => !TOOL_CALL_DENYLIST.has(m.model),
);

interface ResponsesOutputItem {
type: string;
role?: string;
content?: { type: string; text?: string }[];
call_id?: string;
name?: string;
arguments?: string;
}

function getOutputText(json: { output?: ResponsesOutputItem[] }): string {
const items = json.output ?? [];
const parts: string[] = [];
for (const item of items) {
if (item.type === "message" && Array.isArray(item.content)) {
for (const c of item.content) {
if (c.type === "output_text" && typeof c.text === "string") {
parts.push(c.text);
}
}
}
}
return parts.join("");
}

function getFunctionCall(json: {
output?: ResponsesOutputItem[];
}): ResponsesOutputItem | undefined {
return (json.output ?? []).find((i) => i.type === "function_call");
}

async function postResponses(body: unknown, requestId: string) {
return await app.request("/v1/responses", {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-request-id": requestId,
"x-no-fallback": "true",
Authorization: `Bearer real-token`,
},
body: JSON.stringify(body),
});
}

describe("e2e", getConcurrentTestOptions(), () => {
beforeAll(beforeAllHook);

beforeEach(beforeEachHook);

test("empty", () => {
expect(true).toBe(true);
});

test.each(responsesTestModels)(
"responses single-turn $model",
getTestOptions(),
async ({ model }) => {
const requestId = generateTestRequestId();
const res = await postResponses(
{
model,
input: "Say hello in one short sentence.",
},
requestId,
);

const json = await res.json();
if (logMode) {
console.log(
"responses single-turn response:",
JSON.stringify(json, null, 2),
);
}

expect(res.status).toBe(200);
expect(json).toHaveProperty("id");
expect(typeof json.id).toBe("string");
expect(json.id.startsWith("resp_")).toBe(true);
expect(Array.isArray(json.output)).toBe(true);

const text = getOutputText(json);
expect(text.length).toBeGreaterThan(0);

expect(json).toHaveProperty("usage");
expect(typeof json.usage.input_tokens).toBe("number");
expect(typeof json.usage.output_tokens).toBe("number");
expect(json.usage.input_tokens).toBeGreaterThan(0);
expect(json.usage.output_tokens).toBeGreaterThan(0);

await validateLogByRequestId(requestId);
},
);

test.each(responsesTestModels)(
"responses multi-turn $model",
getTestOptions(),
async ({ model }) => {
const firstRequestId = generateTestRequestId();
const firstRes = await postResponses(
{
model,
input:
"My name is Ada. Please remember it. Reply with a brief acknowledgement.",
},
firstRequestId,
);
const firstJson = await firstRes.json();
if (logMode) {
console.log(
"responses multi-turn first:",
JSON.stringify(firstJson, null, 2),
);
}
expect(firstRes.status).toBe(200);
expect(typeof firstJson.id).toBe("string");

const secondRequestId = generateTestRequestId();
const secondRes = await postResponses(
{
model,
input: "What is my name? Reply with just the name.",
previous_response_id: firstJson.id,
},
secondRequestId,
);
const secondJson = await secondRes.json();
if (logMode) {
console.log(
"responses multi-turn second:",
JSON.stringify(secondJson, null, 2),
);
}
expect(secondRes.status).toBe(200);
const text = getOutputText(secondJson);
expect(text.toLowerCase()).toContain("ada");
},
);

test.each(responsesToolCallModels)(
"responses tool calls $model",
getTestOptions(),
async ({ model }) => {
const tools = [
{
type: "function",
name: "get_weather",
description: "Get the current weather for a given city",
parameters: {
type: "object",
properties: {
city: {
type: "string",
description: "The city name to get weather for",
},
},
required: ["city"],
},
},
];

const firstRequestId = generateTestRequestId();
const firstRes = await postResponses(
{
model,
input: [
{
role: "user",
content: "What's the weather like in San Francisco?",
},
],
tools,
tool_choice: "required",
},
firstRequestId,
);
const firstJson = await firstRes.json();
if (logMode) {
console.log(
"responses tool calls first:",
JSON.stringify(firstJson, null, 2),
);
}

expect(firstRes.status).toBe(200);
const fnCall = getFunctionCall(firstJson);
expect(fnCall).toBeDefined();
expect(fnCall?.name).toBe("get_weather");
expect(typeof fnCall?.call_id).toBe("string");
expect(typeof fnCall?.arguments).toBe("string");
const parsedArgs = JSON.parse(fnCall?.arguments ?? "{}");
expect(typeof parsedArgs.city).toBe("string");
expect(parsedArgs.city.toLowerCase()).toContain("san francisco");

const secondRequestId = generateTestRequestId();
const secondRes = await postResponses(
{
model,
previous_response_id: firstJson.id,
input: [
{
type: "function_call_output",
call_id: fnCall?.call_id,
output: "72F and sunny",
Comment on lines +227 to +246
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Assert the replay IDs before sending turn two.

Right now this test only proves the identifiers are typed as strings. If firstJson.id or fnCall.call_id is empty/missing, the second request no longer validates the replay path as strongly as the test name suggests. Please assert both values are non-empty before using them.

🧪 Possible change
 			expect(firstRes.status).toBe(200);
+			expect(typeof firstJson.id).toBe("string");
+			expect(firstJson.id.startsWith("resp_")).toBe(true);
 			const fnCall = getFunctionCall(firstJson);
 			expect(fnCall).toBeDefined();
 			expect(fnCall?.name).toBe("get_weather");
 			expect(typeof fnCall?.call_id).toBe("string");
+			expect(fnCall?.call_id).toBeTruthy();
 			expect(typeof fnCall?.arguments).toBe("string");
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
expect(firstRes.status).toBe(200);
const fnCall = getFunctionCall(firstJson);
expect(fnCall).toBeDefined();
expect(fnCall?.name).toBe("get_weather");
expect(typeof fnCall?.call_id).toBe("string");
expect(typeof fnCall?.arguments).toBe("string");
const parsedArgs = JSON.parse(fnCall?.arguments ?? "{}");
expect(typeof parsedArgs.city).toBe("string");
expect(parsedArgs.city.toLowerCase()).toContain("san francisco");
const secondRequestId = generateTestRequestId();
const secondRes = await postResponses(
{
model,
previous_response_id: firstJson.id,
input: [
{
type: "function_call_output",
call_id: fnCall?.call_id,
output: "72F and sunny",
expect(firstRes.status).toBe(200);
expect(typeof firstJson.id).toBe("string");
expect(firstJson.id.startsWith("resp_")).toBe(true);
const fnCall = getFunctionCall(firstJson);
expect(fnCall).toBeDefined();
expect(fnCall?.name).toBe("get_weather");
expect(typeof fnCall?.call_id).toBe("string");
expect(fnCall?.call_id).toBeTruthy();
expect(typeof fnCall?.arguments).toBe("string");
const parsedArgs = JSON.parse(fnCall?.arguments ?? "{}");
expect(typeof parsedArgs.city).toBe("string");
expect(parsedArgs.city.toLowerCase()).toContain("san francisco");
const secondRequestId = generateTestRequestId();
const secondRes = await postResponses(
{
model,
previous_response_id: firstJson.id,
input: [
{
type: "function_call_output",
call_id: fnCall?.call_id,
output: "72F and sunny",
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/gateway/src/responses.e2e.ts` around lines 227 - 246, The test currently
only checks types but not that identifiers are present; before calling
generateTestRequestId() and postResponses(...) assert that firstJson.id and
fnCall.call_id are non-empty (e.g., use expect(firstJson.id).toBeTruthy() and
expect(fnCall?.call_id).toBeTruthy()) so the replay path is validated; add these
assertions just after obtaining fnCall from getFunctionCall(...) and before
using those values in the second request to ensure the test fails if the IDs are
missing.

},
],
tools,
},
secondRequestId,
);
const secondJson = await secondRes.json();
if (logMode) {
console.log(
"responses tool calls second:",
JSON.stringify(secondJson, null, 2),
);
}

expect(secondRes.status).toBe(200);
const finalText = getOutputText(secondJson).toLowerCase();
expect(finalText.length).toBeGreaterThan(0);
expect(
finalText.includes("sunny") ||
finalText.includes("72") ||
finalText.includes("weather"),
).toBe(true);
},
);
});
13 changes: 11 additions & 2 deletions apps/gateway/src/responses/tools/convert-chat-to-responses.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,17 @@ export function convertChatResponseToResponses(
}
}

// Add message output
if (message?.content !== null && message?.content !== undefined) {
// Add message output. Skip if content is empty/whitespace-only — many
// providers return content: "" alongside tool_calls, and emitting an empty
// message item pollutes stored conversations: on replay via
// previous_response_id it becomes a stray assistant message that separates
// the tool_calls assistant from its tool result, causing strict providers
// (deepseek, bytedance, aws-bedrock, kimi, etc.) to reject the request.
if (
message?.content !== null &&
message?.content !== undefined &&
message.content.trim() !== ""
) {
const contentParts: Array<Record<string, unknown>> = [
{
type: "output_text",
Expand Down
61 changes: 59 additions & 2 deletions apps/gateway/src/responses/tools/convert-responses-to-chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,15 @@ export function convertResponsesInputToMessages(
while (i < input.length) {
const item = input[i]!;

// function_call items -> collect consecutive ones into assistant tool_calls
// function_call items -> collect consecutive ones into assistant tool_calls.
// Also fold any immediately-following assistant `message` items into the
// same assistant message: in the Responses API the tool_calls and the
// assistant text are emitted as separate output items, but in chat
// completions they belong on a single assistant message. Splitting them
// inserts a stray assistant message between the tool_calls and the tool
// result, which strict providers (deepseek family, bytedance, etc.)
// reject with "assistant message with tool_calls must be followed by
// tool messages".
if ("type" in item && item.type === "function_call") {
const toolCalls: ChatMessage["tool_calls"] = [];

Expand All @@ -58,9 +66,29 @@ export function convertResponsesInputToMessages(
i++;
}

// Fold trailing assistant message content (if any) into this same
// assistant message rather than emitting it as a separate message.
let foldedContent: string | null = null;
while (i < input.length) {
const next = input[i] as Record<string, unknown> | undefined;
if (
next &&
next.type === "message" &&
(next.role === "assistant" || next.role === undefined)
) {
const text = extractTextFromContent(next.content);
if (text) {
foldedContent = (foldedContent ?? "") + text;
}
i++;
continue;
}
break;
}

messages.push({
role: "assistant",
content: null,
content: foldedContent,
tool_calls: toolCalls,
});
continue;
Expand Down Expand Up @@ -130,6 +158,35 @@ export function convertResponsesInputToMessages(
return messages;
}

/**
* Extract concatenated plain text from a Responses API message content field
* (which can be a string, an array of content parts, null, or undefined).
* Used when folding a trailing assistant text message into a tool_calls
* assistant message.
*/
function extractTextFromContent(content: unknown): string {
if (content === null || content === undefined) {
return "";
}
if (typeof content === "string") {
return content;
}
if (!Array.isArray(content)) {
return "";
}
const parts: string[] = [];
for (const part of content) {
if (
part &&
typeof part === "object" &&
typeof (part as { text?: unknown }).text === "string"
) {
parts.push((part as { text: string }).text);
}
}
return parts.join("");
}

/**
* Convert Responses API content types to chat completions content types.
* input_text/output_text -> text, input_image -> image_url
Expand Down
Loading