theopenco · steebchen · Feb 20, 2026 · Mar 18, 2026 · coderabbitai · Feb 20, 2026
diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts
@@ -100,6 +100,7 @@ import {
 	selectNextProvider,
 	shouldRetryRequest,
 } from "./tools/retry-with-fallback.js";
+import { serializeStreamingChunk } from "./tools/serialize-streaming-chunk.js";
 import {
 	encodeChatMessages,
 	messageContentToString,
@@ -2296,16 +2297,21 @@ chat.openapi(completions, async (c) => {
 				let firstReasoningTokenReceived = false;
 
 				// Helper function to write SSE and capture for cache
+				// Large payload threshold: skip debug string copies for payloads > 64KB
+				// to avoid multi-MB string allocations in the hot path
+				const LARGE_SSE_THRESHOLD = 65536;
 				const writeSSEAndCache = async (sseData: {
 					data: string;
 					event?: string;
 					id?: string;
 				}) => {
 					await stream.writeSSE(sseData);
 
-					// Collect raw response data for logging only in debug mode and within size limit
+					// Collect raw response data for logging only in debug mode and within size limit.
+					// Skip large payloads (e.g. base64 image data) to avoid multi-MB string copies.
 					if (
 						debugMode &&
+						sseData.data.length < LARGE_SSE_THRESHOLD &&
 						streamingRawResponseData.length < MAX_RAW_DATA_SIZE
 					) {
 						const sseString = `${sseData.event ? `event: ${sseData.event}\n` : ""}data: ${sseData.data}${sseData.id ? `\nid: ${sseData.id}` : ""}\n\n`;
@@ -3332,8 +3338,13 @@ chat.openapi(completions, async (c) => {
 						}
 
 						buffer += chunk;
-						// Collect raw upstream data for logging only in debug mode and within size limit
-						if (debugMode && rawUpstreamData.length < MAX_RAW_DATA_SIZE) {
+						// Collect raw upstream data for logging only in debug mode and within size limit.
+						// Skip large chunks (e.g. base64 image data) to avoid multi-MB string copies.
+						if (
+							debugMode &&
+							chunk.length < LARGE_SSE_THRESHOLD &&
+							rawUpstreamData.length < MAX_RAW_DATA_SIZE
+						) {
 							rawUpstreamData += chunk;
 						}
 
@@ -3436,10 +3447,10 @@ chat.openapi(completions, async (c) => {
 								const firstNewline = betweenEvents.indexOf("\n");
 
 								if (firstNewline !== -1) {
-									// Check if JSON up to first newline is valid
-									const jsonCandidate = betweenEvents
-										.slice(0, firstNewline)
-										.trim();
+									// Check if JSON up to first newline is valid.
+									// Skip .trim() — mightBeCompleteJson handles whitespace
+									// internally without allocating a copy.
+									const jsonCandidate = betweenEvents.slice(0, firstNewline);
 									// Quick heuristic check before expensive JSON.parse
 									let isValidJson = false;
 									if (mightBeCompleteJson(jsonCandidate)) {
@@ -3470,10 +3481,13 @@ chat.openapi(completions, async (c) => {
 								// Try to find the end of the JSON data by looking for the closing brace
 								const newlinePos = bufferCopy.indexOf("\n", eventStartPos);
 								if (newlinePos !== -1) {
-									// We found a newline - check if the JSON before it is valid
-									const jsonCandidate = bufferCopy
-										.slice(eventStartPos, newlinePos)
-										.trim();
+									// We found a newline - check if the JSON before it is valid.
+									// Skip .trim() — mightBeCompleteJson handles whitespace
+									// internally without allocating a copy.
+									const jsonCandidate = bufferCopy.slice(
+										eventStartPos,
+										newlinePos,
+									);
 									// Quick heuristic check before expensive JSON.parse
 									let isValidJson = false;
 									if (mightBeCompleteJson(jsonCandidate)) {
@@ -3543,11 +3557,12 @@ chat.openapi(completions, async (c) => {
 									// Try to detect if we have a complete JSON object
 									const eventDataCandidate = bufferCopy.slice(eventStartPos);
 									if (eventDataCandidate.length > 0) {
-										// Quick heuristic check before expensive JSON.parse
-										const trimmedCandidate = eventDataCandidate.trim();
-										if (mightBeCompleteJson(trimmedCandidate)) {
+										// Quick heuristic check before expensive JSON.parse.
+										// mightBeCompleteJson handles its own whitespace scanning
+										// without allocating a trimmed copy.
+										if (mightBeCompleteJson(eventDataCandidate)) {
 											try {
-												JSON.parse(trimmedCandidate);
+												JSON.parse(eventDataCandidate);
 												// If we can parse it, it's complete
 												eventEnd = bufferCopy.length;
 											} catch {
@@ -3565,9 +3580,15 @@ chat.openapi(completions, async (c) => {
 								}
 							}
 
-							const eventData = bufferCopy
-								.slice(dataIndex + 6, eventEnd)
-								.trim();
+							// For small payloads, trim whitespace normally.
+							// For large payloads (>64KB, e.g. base64 image data), skip .trim()
+							// to avoid allocating a second multi-MB string copy.
+							// JSON.parse handles leading/trailing whitespace fine.
+							const rawEventData = bufferCopy.slice(dataIndex + 6, eventEnd);
+							const eventData =
+								rawEventData.length < LARGE_SSE_THRESHOLD
+									? rawEventData.trim()
+									: rawEventData;
 
 							// Debug logging for troublesome events
 							// Only scan for SSE field contamination on small events to avoid
@@ -3851,13 +3872,18 @@ chat.openapi(completions, async (c) => {
 									}
 								}
 
-								// For Google providers, add usage information when available
+								// For Google providers, extract usage early so we can both
+								// add it to the streaming chunk and reuse it later for tracking
+								// (avoiding a redundant extractTokenUsage call).
+								let googleUsageResult: ReturnType<
+									typeof extractTokenUsage
+								> | null = null;
 								if (
 									usedProvider === "google-ai-studio" ||
 									usedProvider === "google-vertex" ||
 									usedProvider === "obsidian"
 								) {
-									const usage = extractTokenUsage(
+									googleUsageResult = extractTokenUsage(
 										data,
 										usedProvider,
 										fullContent,
@@ -3866,16 +3892,17 @@ chat.openapi(completions, async (c) => {
 
 									// If we have usage data from Google, add it to the streaming chunk
 									if (
-										usage.promptTokens !== null ||
-										usage.completionTokens !== null ||
-										usage.totalTokens !== null
+										googleUsageResult.promptTokens !== null ||
+										googleUsageResult.completionTokens !== null ||
+										googleUsageResult.totalTokens !== null
 									) {
 										transformedData.usage = {
-											prompt_tokens: usage.promptTokens ?? 0,
-											completion_tokens: usage.completionTokens ?? 0,
-											total_tokens: usage.totalTokens ?? 0,
-											...(usage.reasoningTokens !== null && {
-												reasoning_tokens: usage.reasoningTokens,
+											prompt_tokens: googleUsageResult.promptTokens ?? 0,
+											completion_tokens:
+												googleUsageResult.completionTokens ?? 0,
+											total_tokens: googleUsageResult.totalTokens ?? 0,
+											...(googleUsageResult.reasoningTokens !== null && {
+												reasoning_tokens: googleUsageResult.reasoningTokens,
 											}),
 										};
 									}
@@ -3959,7 +3986,7 @@ chat.openapi(completions, async (c) => {
 
 									// Create a copy without content in delta for streaming
 									const chunkWithoutContent = JSON.parse(
-										JSON.stringify(transformedData),
+										serializeStreamingChunk(transformedData),
 									);
 									if (chunkWithoutContent.choices?.[0]?.delta?.content) {
 										delete chunkWithoutContent.choices[0].delta.content;
@@ -3982,7 +4009,7 @@ chat.openapi(completions, async (c) => {
 									}
 								} else {
 									await writeSSEAndCache({
-										data: JSON.stringify(transformedData),
+										data: serializeStreamingChunk(transformedData),
 										id: String(eventId++),
 									});
 								}
@@ -4195,13 +4222,16 @@ chat.openapi(completions, async (c) => {
 										break;
 								}
 
-								// Extract token usage using helper function
-								const usage = extractTokenUsage(
-									data,
-									usedProvider,
-									fullContent,
-									imageByteSize,
-								);
+								// Extract token usage using helper function.
+								// Reuse the result from earlier Google-specific extraction if available.
+								const usage =
+									googleUsageResult ??
+									extractTokenUsage(
+										data,
+										usedProvider,
+										fullContent,
+										imageByteSize,
+									);
 								if (usage.promptTokens !== null) {
 									promptTokens = usage.promptTokens;
 								}

diff --git a/apps/gateway/src/chat/tools/extract-images.ts b/apps/gateway/src/chat/tools/extract-images.ts
@@ -4,9 +4,9 @@ import type { Provider } from "@llmgateway/models";
 /**
  * Extracts images from streaming data based on provider format.
  *
- * For large base64 image data, we reference the original inlineData fields
- * directly rather than creating new concatenated strings, to avoid unnecessary
- * multi-MB string copies.
+ * For large base64 image data, we store mimeType and data separately
+ * to avoid creating concatenated multi-MB URL strings. The URL is
+ * constructed lazily only when needed (e.g. for non-streaming responses).
  */
 export function extractImages(data: any, provider: Provider): ImageObject[] {
 	switch (provider) {
@@ -19,7 +19,12 @@ export function extractImages(data: any, provider: Provider): ImageObject[] {
 				(part: any): ImageObject => ({
 					type: "image_url",
 					image_url: {
-						url: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`,
+						// Store references to avoid multi-MB string concatenation.
+						// The _mime and _base64 fields allow serialization without
+						// creating an intermediate concatenated URL string.
+						url: "",
+						_mime: part.inlineData.mimeType,
+						_base64: part.inlineData.data,
 					},
 				}),
 			);
@@ -28,3 +33,17 @@ export function extractImages(data: any, provider: Provider): ImageObject[] {
 			return [];
 	}
 }
+
+/**
+ * Serializes an image object to a JSON string fragment without creating
+ * an intermediate concatenated data URL. This avoids allocating multi-MB
+ * strings just to immediately re-serialize them.
+ */
+export function serializeImageObject(img: ImageObject): string {
+	const imgUrl = img.image_url as any;
+	if (imgUrl._mime && imgUrl._base64) {
+		// Build the JSON directly, embedding the base64 data in place
+		return `{"type":"image_url","image_url":{"url":"data:${imgUrl._mime};base64,${imgUrl._base64}"}}`;
+	}
+	return JSON.stringify(img);
+}
-/**
- * Serializes an image object to a JSON string fragment without creating
- * an intermediate concatenated data URL. This avoids allocating multi-MB
- * strings just to immediately re-serialize them.
- */
-export function serializeImageObject(img: ImageObject): string {
-	const imgUrl = img.image_url as any;
-	if (imgUrl._mime && imgUrl._base64) {
-		// Build the JSON directly, embedding the base64 data in place
-		return `{"type":"image_url","image_url":{"url":"data:${imgUrl._mime};base64,${imgUrl._base64}"}}`;
-	}
-	return JSON.stringify(img);
-}
-/**
- * Serializes an image object to a JSON string fragment without creating
- * an intermediate concatenated data URL. This avoids allocating multi-MB
- * strings just to immediately re-serialize them.
- */
-export function serializeImageObject(img: ImageObject): string {
-	const imgUrl = img.image_url as any;
-	if (imgUrl._mime && imgUrl._base64) {
-		// Build the JSON directly, embedding the base64 data in place
-		return `{"type":"image_url","image_url":{"url":"data:${imgUrl._mime};base64,${imgUrl._base64}"}}`;
-	}
-	return JSON.stringify(img);
-}
diff --git a/apps/gateway/src/chat/tools/might-be-complete-json.spec.ts b/apps/gateway/src/chat/tools/might-be-complete-json.spec.ts
@@ -76,6 +76,15 @@ describe("mightBeCompleteJson", () => {
 		expect(mightBeCompleteJson("[1,2]]")).toBe(false);
 	});
 
+	it("handles leading and trailing whitespace without .trim() copy", () => {
+		expect(mightBeCompleteJson('  {"a":1}  ')).toBe(true);
+		expect(mightBeCompleteJson('\n{"a":1}\n')).toBe(true);
+		expect(mightBeCompleteJson("\t[1,2]\t")).toBe(true);
+		expect(mightBeCompleteJson('  \n\t{"a":1}\n  ')).toBe(true);
+		expect(mightBeCompleteJson("   ")).toBe(false);
+		expect(mightBeCompleteJson("\n\n")).toBe(false);
+	});
+
 	// Tests for large payload optimization (>100KB threshold)
 	describe("large payloads (>100KB)", () => {
 		const LARGE_SIZE = 120 * 1024; // 120KB to exceed the 100KB threshold
@@ -119,6 +128,12 @@ describe("mightBeCompleteJson", () => {
 			expect(mightBeCompleteJson(json)).toBe(false);
 		});
 
+		it("handles large payload with surrounding whitespace", () => {
+			const base64Data = "A".repeat(LARGE_SIZE);
+			const json = `  \n{"data":"${base64Data}"}\n  `;
+			expect(mightBeCompleteJson(json)).toBe(true);
+		});
+
 		it("handles large payload performance efficiently", () => {
 			// 5MB base64 data simulating a real image
 			const base64Data = "A".repeat(5 * 1024 * 1024);