Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 67 additions & 37 deletions apps/gateway/src/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ import {
selectNextProvider,
shouldRetryRequest,
} from "./tools/retry-with-fallback.js";
import { serializeStreamingChunk } from "./tools/serialize-streaming-chunk.js";
import {
encodeChatMessages,
messageContentToString,
Expand Down Expand Up @@ -2296,16 +2297,21 @@ chat.openapi(completions, async (c) => {
let firstReasoningTokenReceived = false;

// Helper function to write SSE and capture for cache
// Large payload threshold: skip debug string copies for payloads > 64KB
// to avoid multi-MB string allocations in the hot path
const LARGE_SSE_THRESHOLD = 65536;
const writeSSEAndCache = async (sseData: {
data: string;
event?: string;
id?: string;
}) => {
await stream.writeSSE(sseData);

// Collect raw response data for logging only in debug mode and within size limit
// Collect raw response data for logging only in debug mode and within size limit.
// Skip large payloads (e.g. base64 image data) to avoid multi-MB string copies.
if (
debugMode &&
sseData.data.length < LARGE_SSE_THRESHOLD &&
streamingRawResponseData.length < MAX_RAW_DATA_SIZE
) {
const sseString = `${sseData.event ? `event: ${sseData.event}\n` : ""}data: ${sseData.data}${sseData.id ? `\nid: ${sseData.id}` : ""}\n\n`;
Expand Down Expand Up @@ -3332,8 +3338,13 @@ chat.openapi(completions, async (c) => {
}

buffer += chunk;
// Collect raw upstream data for logging only in debug mode and within size limit
if (debugMode && rawUpstreamData.length < MAX_RAW_DATA_SIZE) {
// Collect raw upstream data for logging only in debug mode and within size limit.
// Skip large chunks (e.g. base64 image data) to avoid multi-MB string copies.
if (
debugMode &&
chunk.length < LARGE_SSE_THRESHOLD &&
rawUpstreamData.length < MAX_RAW_DATA_SIZE
) {
rawUpstreamData += chunk;
}

Expand Down Expand Up @@ -3436,10 +3447,10 @@ chat.openapi(completions, async (c) => {
const firstNewline = betweenEvents.indexOf("\n");

if (firstNewline !== -1) {
// Check if JSON up to first newline is valid
const jsonCandidate = betweenEvents
.slice(0, firstNewline)
.trim();
// Check if JSON up to first newline is valid.
// Skip .trim() — mightBeCompleteJson handles whitespace
// internally without allocating a copy.
const jsonCandidate = betweenEvents.slice(0, firstNewline);
// Quick heuristic check before expensive JSON.parse
let isValidJson = false;
if (mightBeCompleteJson(jsonCandidate)) {
Expand Down Expand Up @@ -3470,10 +3481,13 @@ chat.openapi(completions, async (c) => {
// Try to find the end of the JSON data by looking for the closing brace
const newlinePos = bufferCopy.indexOf("\n", eventStartPos);
if (newlinePos !== -1) {
// We found a newline - check if the JSON before it is valid
const jsonCandidate = bufferCopy
.slice(eventStartPos, newlinePos)
.trim();
// We found a newline - check if the JSON before it is valid.
// Skip .trim() — mightBeCompleteJson handles whitespace
// internally without allocating a copy.
const jsonCandidate = bufferCopy.slice(
eventStartPos,
newlinePos,
);
// Quick heuristic check before expensive JSON.parse
let isValidJson = false;
if (mightBeCompleteJson(jsonCandidate)) {
Expand Down Expand Up @@ -3543,11 +3557,12 @@ chat.openapi(completions, async (c) => {
// Try to detect if we have a complete JSON object
const eventDataCandidate = bufferCopy.slice(eventStartPos);
if (eventDataCandidate.length > 0) {
// Quick heuristic check before expensive JSON.parse
const trimmedCandidate = eventDataCandidate.trim();
if (mightBeCompleteJson(trimmedCandidate)) {
// Quick heuristic check before expensive JSON.parse.
// mightBeCompleteJson handles its own whitespace scanning
// without allocating a trimmed copy.
if (mightBeCompleteJson(eventDataCandidate)) {
try {
JSON.parse(trimmedCandidate);
JSON.parse(eventDataCandidate);
// If we can parse it, it's complete
eventEnd = bufferCopy.length;
} catch {
Expand All @@ -3565,9 +3580,15 @@ chat.openapi(completions, async (c) => {
}
}

const eventData = bufferCopy
.slice(dataIndex + 6, eventEnd)
.trim();
// For small payloads, trim whitespace normally.
// For large payloads (>64KB, e.g. base64 image data), skip .trim()
// to avoid allocating a second multi-MB string copy.
// JSON.parse handles leading/trailing whitespace fine.
const rawEventData = bufferCopy.slice(dataIndex + 6, eventEnd);
const eventData =
rawEventData.length < LARGE_SSE_THRESHOLD
? rawEventData.trim()
: rawEventData;

// Debug logging for troublesome events
// Only scan for SSE field contamination on small events to avoid
Expand Down Expand Up @@ -3851,13 +3872,18 @@ chat.openapi(completions, async (c) => {
}
}

// For Google providers, add usage information when available
// For Google providers, extract usage early so we can both
// add it to the streaming chunk and reuse it later for tracking
// (avoiding a redundant extractTokenUsage call).
let googleUsageResult: ReturnType<
typeof extractTokenUsage
> | null = null;
if (
usedProvider === "google-ai-studio" ||
usedProvider === "google-vertex" ||
usedProvider === "obsidian"
) {
const usage = extractTokenUsage(
googleUsageResult = extractTokenUsage(
data,
usedProvider,
fullContent,
Expand All @@ -3866,16 +3892,17 @@ chat.openapi(completions, async (c) => {

// If we have usage data from Google, add it to the streaming chunk
if (
usage.promptTokens !== null ||
usage.completionTokens !== null ||
usage.totalTokens !== null
googleUsageResult.promptTokens !== null ||
googleUsageResult.completionTokens !== null ||
googleUsageResult.totalTokens !== null
) {
transformedData.usage = {
prompt_tokens: usage.promptTokens ?? 0,
completion_tokens: usage.completionTokens ?? 0,
total_tokens: usage.totalTokens ?? 0,
...(usage.reasoningTokens !== null && {
reasoning_tokens: usage.reasoningTokens,
prompt_tokens: googleUsageResult.promptTokens ?? 0,
completion_tokens:
googleUsageResult.completionTokens ?? 0,
total_tokens: googleUsageResult.totalTokens ?? 0,
...(googleUsageResult.reasoningTokens !== null && {
reasoning_tokens: googleUsageResult.reasoningTokens,
}),
};
}
Expand Down Expand Up @@ -3959,7 +3986,7 @@ chat.openapi(completions, async (c) => {

// Create a copy without content in delta for streaming
const chunkWithoutContent = JSON.parse(
JSON.stringify(transformedData),
serializeStreamingChunk(transformedData),
);
if (chunkWithoutContent.choices?.[0]?.delta?.content) {
delete chunkWithoutContent.choices[0].delta.content;
Expand All @@ -3982,7 +4009,7 @@ chat.openapi(completions, async (c) => {
}
} else {
await writeSSEAndCache({
data: JSON.stringify(transformedData),
data: serializeStreamingChunk(transformedData),
id: String(eventId++),
});
}
Expand Down Expand Up @@ -4195,13 +4222,16 @@ chat.openapi(completions, async (c) => {
break;
}

// Extract token usage using helper function
const usage = extractTokenUsage(
data,
usedProvider,
fullContent,
imageByteSize,
);
// Extract token usage using helper function.
// Reuse the result from earlier Google-specific extraction if available.
const usage =
googleUsageResult ??
extractTokenUsage(
data,
usedProvider,
fullContent,
imageByteSize,
);
if (usage.promptTokens !== null) {
promptTokens = usage.promptTokens;
}
Expand Down
27 changes: 23 additions & 4 deletions apps/gateway/src/chat/tools/extract-images.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import type { Provider } from "@llmgateway/models";
/**
* Extracts images from streaming data based on provider format.
*
* For large base64 image data, we reference the original inlineData fields
* directly rather than creating new concatenated strings, to avoid unnecessary
* multi-MB string copies.
* For large base64 image data, we store mimeType and data separately
* to avoid creating concatenated multi-MB URL strings. The URL is
* constructed lazily only when needed (e.g. for non-streaming responses).
*/
export function extractImages(data: any, provider: Provider): ImageObject[] {
switch (provider) {
Expand All @@ -19,7 +19,12 @@ export function extractImages(data: any, provider: Provider): ImageObject[] {
(part: any): ImageObject => ({
type: "image_url",
image_url: {
url: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`,
// Store references to avoid multi-MB string concatenation.
// The _mime and _base64 fields allow serialization without
// creating an intermediate concatenated URL string.
url: "",
_mime: part.inlineData.mimeType,
_base64: part.inlineData.data,
},
}),
);
Expand All @@ -28,3 +33,17 @@ export function extractImages(data: any, provider: Provider): ImageObject[] {
return [];
}
}

/**
* Serializes an image object to a JSON string fragment without creating
* an intermediate concatenated data URL. This avoids allocating multi-MB
* strings just to immediately re-serialize them.
*/
export function serializeImageObject(img: ImageObject): string {
const imgUrl = img.image_url as any;
if (imgUrl._mime && imgUrl._base64) {
// Build the JSON directly, embedding the base64 data in place
return `{"type":"image_url","image_url":{"url":"data:${imgUrl._mime};base64,${imgUrl._base64}"}}`;
}
return JSON.stringify(img);
Comment on lines +42 to +48
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

cat -n apps/gateway/src/chat/tools/extract-images.ts | head -60

Repository: theopenco/llmgateway

Length of output: 2100


🏁 Script executed:

rg -n "type ImageObject|interface ImageObject" apps/gateway/src/chat/tools/extract-images.ts

Repository: theopenco/llmgateway

Length of output: 46


🏁 Script executed:

rg -n "type ImageObject|interface ImageObject" --type ts --type tsx

Repository: theopenco/llmgateway

Length of output: 91


🏁 Script executed:

cat -n apps/gateway/src/chat/tools/types.ts 2>/dev/null || cat -n apps/gateway/src/chat/tools/types.js

Repository: theopenco/llmgateway

Length of output: 1626


🏁 Script executed:

find apps/gateway/src/chat/tools -name "types.*" -type f

Repository: theopenco/llmgateway

Length of output: 100


Remove the unnecessary as any cast and escape values in the manual JSON string to prevent injection.

The ImageObject type already includes _mime and _base64 as optional properties on image_url, making the cast redundant. Additionally, directly injecting unescaped string values into a JSON template literal can break the output if the value contains quotes or backslashes.

🔧 Suggested fix
 export function serializeImageObject(img: ImageObject): string {
-	const imgUrl = img.image_url as any;
-	if (imgUrl._mime && imgUrl._base64) {
+	const { _mime, _base64 } = img.image_url;
+	if (_mime && _base64) {
+		const safeMime = _mime.replace(/[\\"]/g, "\\$&");
 		// Build the JSON directly, embedding the base64 data in place
-		return `{"type":"image_url","image_url":{"url":"data:${imgUrl._mime};base64,${imgUrl._base64}"}}`;
+		return `{"type":"image_url","image_url":{"url":"data:${safeMime};base64,${_base64}"}}`;
 	}
 	return JSON.stringify(img);
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/gateway/src/chat/tools/extract-images.ts` around lines 42 - 48, In
serializeImageObject, remove the unnecessary `as any` cast on img.image_url and
avoid manual string interpolation for JSON; instead build a plain object
containing the data URL (compose it from img.image_url._mime and
img.image_url._base64) and use JSON.stringify to produce the final string so
values are properly escaped and no injection can occur (refer to
serializeImageObject and img.image_url/_mime/_base64).

}
Comment on lines +37 to +49
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exported function serializeImageObject is never used in the codebase. The image serialization for streaming is handled by serializeStreamingChunk in serialize-streaming-chunk.ts instead. Consider removing this unused export to reduce code maintenance burden, or document if it's intended for future use or external consumption.

Suggested change
/**
* Serializes an image object to a JSON string fragment without creating
* an intermediate concatenated data URL. This avoids allocating multi-MB
* strings just to immediately re-serialize them.
*/
export function serializeImageObject(img: ImageObject): string {
const imgUrl = img.image_url as any;
if (imgUrl._mime && imgUrl._base64) {
// Build the JSON directly, embedding the base64 data in place
return `{"type":"image_url","image_url":{"url":"data:${imgUrl._mime};base64,${imgUrl._base64}"}}`;
}
return JSON.stringify(img);
}

Copilot uses AI. Check for mistakes.
15 changes: 15 additions & 0 deletions apps/gateway/src/chat/tools/might-be-complete-json.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ describe("mightBeCompleteJson", () => {
expect(mightBeCompleteJson("[1,2]]")).toBe(false);
});

it("handles leading and trailing whitespace without .trim() copy", () => {
expect(mightBeCompleteJson(' {"a":1} ')).toBe(true);
expect(mightBeCompleteJson('\n{"a":1}\n')).toBe(true);
expect(mightBeCompleteJson("\t[1,2]\t")).toBe(true);
expect(mightBeCompleteJson(' \n\t{"a":1}\n ')).toBe(true);
expect(mightBeCompleteJson(" ")).toBe(false);
expect(mightBeCompleteJson("\n\n")).toBe(false);
});

// Tests for large payload optimization (>100KB threshold)
describe("large payloads (>100KB)", () => {
const LARGE_SIZE = 120 * 1024; // 120KB to exceed the 100KB threshold
Expand Down Expand Up @@ -119,6 +128,12 @@ describe("mightBeCompleteJson", () => {
expect(mightBeCompleteJson(json)).toBe(false);
});

it("handles large payload with surrounding whitespace", () => {
const base64Data = "A".repeat(LARGE_SIZE);
const json = ` \n{"data":"${base64Data}"}\n `;
expect(mightBeCompleteJson(json)).toBe(true);
});

it("handles large payload performance efficiently", () => {
// 5MB base64 data simulating a real image
const base64Data = "A".repeat(5 * 1024 * 1024);
Expand Down
Loading
Loading