Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/happy-badgers-create.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browser-ai/core": patch
---

perf: reduce prompt traversals and optimize base64 conversion
10 changes: 4 additions & 6 deletions packages/vercel/core/src/chat/browser-ai-language-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import {
} from "@browser-ai/shared";
import { convertToBrowserAIMessages } from "../utils/convert-to-browser-ai-messages";
import { gatherUnsupportedSettingWarnings } from "../utils/warnings";
import { hasMultimodalContent, getExpectedInputs } from "../utils/prompt-utils";
import { getMultimodalInfo } from "../utils/prompt-utils";
import { SessionManager } from "./session-manager";

export type BrowserAIChatModelId = "text";
Expand Down Expand Up @@ -149,8 +149,8 @@ export class BrowserAIChatLanguageModel implements LanguageModelV3 {
);
}

// Check if this is a multimodal prompt
const hasMultiModalInput = hasMultimodalContent(prompt);
// Detect multimodal content and collect expected inputs in one pass
const { hasMultiModalInput, expectedInputs } = getMultimodalInfo(prompt);

// Convert messages to the DOM API format
const { systemMessage, messages } = convertToBrowserAIMessages(prompt);
Expand Down Expand Up @@ -180,9 +180,7 @@ export class BrowserAIChatLanguageModel implements LanguageModelV3 {
warnings,
promptOptions,
hasMultiModalInput,
expectedInputs: hasMultiModalInput
? getExpectedInputs(prompt)
: undefined,
expectedInputs,
functionTools,
};
}
Expand Down
15 changes: 0 additions & 15 deletions packages/vercel/core/src/chat/session-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -260,21 +260,6 @@ export class SessionManager {
}
}

// Remove any custom options that aren't part of the standard API
this.sanitizeOptions(mergedOptions);

return mergedOptions;
}

/**
* Removes custom options that aren't part of LanguageModel.create API
*
* @param options - Options object to sanitize in-place
* @private
*/
private sanitizeOptions(
options: LanguageModelCreateOptions & Partial<CustomProviderOptions>,
): void {
// Remove our custom options that the Prompt API doesn't understand
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,7 @@ export interface ConvertedMessages {
function convertBase64ToUint8Array(base64: string): Uint8Array {
try {
const binaryString = atob(base64);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes;
return Uint8Array.from(binaryString, (c) => c.charCodeAt(0));
} catch (error) {
throw new Error(`Failed to convert base64 to Uint8Array: ${error}`);
}
Expand Down
49 changes: 14 additions & 35 deletions packages/vercel/core/src/utils/prompt-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,16 @@
import type { LanguageModelV3Prompt } from "@ai-sdk/provider";

/**
* Detect if the prompt contains multimodal content (images, audio)
*
* @param prompt - The prompt to check
* @returns true if the prompt contains any file content
*/
export function hasMultimodalContent(prompt: LanguageModelV3Prompt): boolean {
for (const message of prompt) {
if (message.role === "user") {
for (const part of message.content) {
if (part.type === "file") {
return true;
}
}
}
}
return false;
}

/**
* Get expected inputs based on prompt content.
* Analyzes the prompt to determine what types of inputs (text, image, audio) are used.
* This information is used to configure the Prompt API session with the correct input capabilities.
* Detect multimodal content and collect expected input types in a single pass.
*
* @param prompt - The prompt to analyze
* @returns Array of expected input types for session creation (only includes image/audio, text is assumed)
* @example
* ```typescript
* const inputs = getExpectedInputs(prompt);
* // Returns: [{ type: "image" }] if prompt contains images
* // Returns: [] if prompt only contains text
* ```
* @returns hasMultiModalInput flag and the expectedInputs array (undefined when text-only)
*/
export function getExpectedInputs(
prompt: LanguageModelV3Prompt,
): Array<{ type: "text" | "image" | "audio" }> {
const inputs = new Set<"text" | "image" | "audio">();
// Don't add text by default - it's assumed by the Prompt API
export function getMultimodalInfo(prompt: LanguageModelV3Prompt): {
hasMultiModalInput: boolean;
expectedInputs: Array<{ type: "text" | "image" | "audio" }> | undefined;
} {
const inputs = new Set<"image" | "audio">();

for (const message of prompt) {
if (message.role === "user") {
Expand All @@ -57,7 +30,13 @@ export function getExpectedInputs(
}
}

return Array.from(inputs).map((type) => ({ type }));
const hasMultiModalInput = inputs.size > 0;
return {
hasMultiModalInput,
expectedInputs: hasMultiModalInput
? Array.from(inputs, (type) => ({ type }))
: undefined,
};
}

/**
Expand Down
Loading