jakobhoeg · jakobhoeg · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.changeset/happy-badgers-create.md b/.changeset/happy-badgers-create.md
@@ -0,0 +1,5 @@
+---
+"@browser-ai/core": patch
+---
+
+perf: reduce prompt traversals and optimize base64 conversion
diff --git a/packages/vercel/core/src/chat/browser-ai-language-model.ts b/packages/vercel/core/src/chat/browser-ai-language-model.ts
@@ -21,7 +21,7 @@ import {
 } from "@browser-ai/shared";
 import { convertToBrowserAIMessages } from "../utils/convert-to-browser-ai-messages";
 import { gatherUnsupportedSettingWarnings } from "../utils/warnings";
-import { hasMultimodalContent, getExpectedInputs } from "../utils/prompt-utils";
+import { getMultimodalInfo } from "../utils/prompt-utils";
 import { SessionManager } from "./session-manager";
 
 export type BrowserAIChatModelId = "text";
@@ -149,8 +149,8 @@ export class BrowserAIChatLanguageModel implements LanguageModelV3 {
       );
     }
 
-    // Check if this is a multimodal prompt
-    const hasMultiModalInput = hasMultimodalContent(prompt);
+    // Detect multimodal content and collect expected inputs in one pass
+    const { hasMultiModalInput, expectedInputs } = getMultimodalInfo(prompt);
 
     // Convert messages to the DOM API format
     const { systemMessage, messages } = convertToBrowserAIMessages(prompt);
@@ -180,9 +180,7 @@ export class BrowserAIChatLanguageModel implements LanguageModelV3 {
       warnings,
       promptOptions,
       hasMultiModalInput,
-      expectedInputs: hasMultiModalInput
-        ? getExpectedInputs(prompt)
-        : undefined,
+      expectedInputs,
       functionTools,
     };
   }

diff --git a/packages/vercel/core/src/chat/session-manager.ts b/packages/vercel/core/src/chat/session-manager.ts
@@ -260,21 +260,6 @@ export class SessionManager {
       }
     }
 
-    // Remove any custom options that aren't part of the standard API
-    this.sanitizeOptions(mergedOptions);
-
     return mergedOptions;
   }
-
-  /**
-   * Removes custom options that aren't part of LanguageModel.create API
-   *
-   * @param options - Options object to sanitize in-place
-   * @private
-   */
-  private sanitizeOptions(
-    options: LanguageModelCreateOptions & Partial<CustomProviderOptions>,
-  ): void {
-    // Remove our custom options that the Prompt API doesn't understand
-  }
 }
diff --git a/packages/vercel/core/src/utils/convert-to-browser-ai-messages.ts b/packages/vercel/core/src/utils/convert-to-browser-ai-messages.ts
@@ -18,11 +18,7 @@ export interface ConvertedMessages {
 function convertBase64ToUint8Array(base64: string): Uint8Array {
   try {
     const binaryString = atob(base64);
-    const bytes = new Uint8Array(binaryString.length);
-    for (let i = 0; i < binaryString.length; i++) {
-      bytes[i] = binaryString.charCodeAt(i);
-    }
-    return bytes;
+    return Uint8Array.from(binaryString, (c) => c.charCodeAt(0));
   } catch (error) {
     throw new Error(`Failed to convert base64 to Uint8Array: ${error}`);
   }

diff --git a/packages/vercel/core/src/utils/prompt-utils.ts b/packages/vercel/core/src/utils/prompt-utils.ts
@@ -5,43 +5,16 @@
 import type { LanguageModelV3Prompt } from "@ai-sdk/provider";
 
 /**
- * Detect if the prompt contains multimodal content (images, audio)
- *
- * @param prompt - The prompt to check
- * @returns true if the prompt contains any file content
- */
-export function hasMultimodalContent(prompt: LanguageModelV3Prompt): boolean {
-  for (const message of prompt) {
-    if (message.role === "user") {
-      for (const part of message.content) {
-        if (part.type === "file") {
-          return true;
-        }
-      }
-    }
-  }
-  return false;
-}
-
-/**
- * Get expected inputs based on prompt content.
- * Analyzes the prompt to determine what types of inputs (text, image, audio) are used.
- * This information is used to configure the Prompt API session with the correct input capabilities.
+ * Detect multimodal content and collect expected input types in a single pass.
  *
  * @param prompt - The prompt to analyze
- * @returns Array of expected input types for session creation (only includes image/audio, text is assumed)
- * @example
- * ```typescript
- * const inputs = getExpectedInputs(prompt);
- * // Returns: [{ type: "image" }] if prompt contains images
- * // Returns: [] if prompt only contains text
- * ```
+ * @returns hasMultiModalInput flag and the expectedInputs array (undefined when text-only)
  */
-export function getExpectedInputs(
-  prompt: LanguageModelV3Prompt,
-): Array<{ type: "text" | "image" | "audio" }> {
-  const inputs = new Set<"text" | "image" | "audio">();
-  // Don't add text by default - it's assumed by the Prompt API
+export function getMultimodalInfo(prompt: LanguageModelV3Prompt): {
+  hasMultiModalInput: boolean;
+  expectedInputs: Array<{ type: "text" | "image" | "audio" }> | undefined;
+} {
+  const inputs = new Set<"image" | "audio">();
 
   for (const message of prompt) {
     if (message.role === "user") {
@@ -57,7 +30,13 @@ export function getExpectedInputs(
     }
   }
 
-  return Array.from(inputs).map((type) => ({ type }));
+  const hasMultiModalInput = inputs.size > 0;
+  return {
+    hasMultiModalInput,
+    expectedInputs: hasMultiModalInput
+      ? Array.from(inputs, (type) => ({ type }))
+      : undefined,
+  };
 }
 
 /**