diff --git a/.changeset/happy-badgers-create.md b/.changeset/happy-badgers-create.md new file mode 100644 index 0000000..64d2a60 --- /dev/null +++ b/.changeset/happy-badgers-create.md @@ -0,0 +1,5 @@ +--- +"@browser-ai/core": patch +--- + +perf: reduce prompt traversals and optimize base64 conversion diff --git a/packages/vercel/core/src/chat/browser-ai-language-model.ts b/packages/vercel/core/src/chat/browser-ai-language-model.ts index a21c0c1..2e13739 100644 --- a/packages/vercel/core/src/chat/browser-ai-language-model.ts +++ b/packages/vercel/core/src/chat/browser-ai-language-model.ts @@ -21,7 +21,7 @@ import { } from "@browser-ai/shared"; import { convertToBrowserAIMessages } from "../utils/convert-to-browser-ai-messages"; import { gatherUnsupportedSettingWarnings } from "../utils/warnings"; -import { hasMultimodalContent, getExpectedInputs } from "../utils/prompt-utils"; +import { getMultimodalInfo } from "../utils/prompt-utils"; import { SessionManager } from "./session-manager"; export type BrowserAIChatModelId = "text"; @@ -149,8 +149,8 @@ export class BrowserAIChatLanguageModel implements LanguageModelV3 { ); } - // Check if this is a multimodal prompt - const hasMultiModalInput = hasMultimodalContent(prompt); + // Detect multimodal content and collect expected inputs in one pass + const { hasMultiModalInput, expectedInputs } = getMultimodalInfo(prompt); // Convert messages to the DOM API format const { systemMessage, messages } = convertToBrowserAIMessages(prompt); @@ -180,9 +180,7 @@ export class BrowserAIChatLanguageModel implements LanguageModelV3 { warnings, promptOptions, hasMultiModalInput, - expectedInputs: hasMultiModalInput - ? getExpectedInputs(prompt) - : undefined, + expectedInputs, functionTools, }; } diff --git a/packages/vercel/core/src/chat/session-manager.ts b/packages/vercel/core/src/chat/session-manager.ts index 1224bc5..1cc72ef 100644 --- a/packages/vercel/core/src/chat/session-manager.ts +++ b/packages/vercel/core/src/chat/session-manager.ts @@ -260,21 +260,6 @@ export class SessionManager { } } - // Remove any custom options that aren't part of the standard API - this.sanitizeOptions(mergedOptions); - return mergedOptions; } - - /** - * Removes custom options that aren't part of LanguageModel.create API - * - * @param options - Options object to sanitize in-place - * @private - */ - private sanitizeOptions( - options: LanguageModelCreateOptions & Partial, - ): void { - // Remove our custom options that the Prompt API doesn't understand - } } diff --git a/packages/vercel/core/src/utils/convert-to-browser-ai-messages.ts b/packages/vercel/core/src/utils/convert-to-browser-ai-messages.ts index 5d5a073..2a093e6 100644 --- a/packages/vercel/core/src/utils/convert-to-browser-ai-messages.ts +++ b/packages/vercel/core/src/utils/convert-to-browser-ai-messages.ts @@ -18,11 +18,7 @@ export interface ConvertedMessages { function convertBase64ToUint8Array(base64: string): Uint8Array { try { const binaryString = atob(base64); - const bytes = new Uint8Array(binaryString.length); - for (let i = 0; i < binaryString.length; i++) { - bytes[i] = binaryString.charCodeAt(i); - } - return bytes; + return Uint8Array.from(binaryString, (c) => c.charCodeAt(0)); } catch (error) { throw new Error(`Failed to convert base64 to Uint8Array: ${error}`); } diff --git a/packages/vercel/core/src/utils/prompt-utils.ts b/packages/vercel/core/src/utils/prompt-utils.ts index 8eb3d4a..fa594e6 100644 --- a/packages/vercel/core/src/utils/prompt-utils.ts +++ b/packages/vercel/core/src/utils/prompt-utils.ts @@ -5,43 +5,16 @@ import type { LanguageModelV3Prompt } from "@ai-sdk/provider"; /** - * Detect if the prompt contains multimodal content (images, audio) - * - * @param prompt - The prompt to check - * @returns true if the prompt contains any file content - */ -export function hasMultimodalContent(prompt: LanguageModelV3Prompt): boolean { - for (const message of prompt) { - if (message.role === "user") { - for (const part of message.content) { - if (part.type === "file") { - return true; - } - } - } - } - return false; -} - -/** - * Get expected inputs based on prompt content. - * Analyzes the prompt to determine what types of inputs (text, image, audio) are used. - * This information is used to configure the Prompt API session with the correct input capabilities. + * Detect multimodal content and collect expected input types in a single pass. * * @param prompt - The prompt to analyze - * @returns Array of expected input types for session creation (only includes image/audio, text is assumed) - * @example - * ```typescript - * const inputs = getExpectedInputs(prompt); - * // Returns: [{ type: "image" }] if prompt contains images - * // Returns: [] if prompt only contains text - * ``` + * @returns hasMultiModalInput flag and the expectedInputs array (undefined when text-only) */ -export function getExpectedInputs( - prompt: LanguageModelV3Prompt, -): Array<{ type: "text" | "image" | "audio" }> { - const inputs = new Set<"text" | "image" | "audio">(); - // Don't add text by default - it's assumed by the Prompt API +export function getMultimodalInfo(prompt: LanguageModelV3Prompt): { + hasMultiModalInput: boolean; + expectedInputs: Array<{ type: "text" | "image" | "audio" }> | undefined; +} { + const inputs = new Set<"image" | "audio">(); for (const message of prompt) { if (message.role === "user") { @@ -57,7 +30,13 @@ export function getExpectedInputs( } } - return Array.from(inputs).map((type) => ({ type })); + const hasMultiModalInput = inputs.size > 0; + return { + hasMultiModalInput, + expectedInputs: hasMultiModalInput + ? Array.from(inputs, (type) => ({ type })) + : undefined, + }; } /**