Skip to content

Commit 15d1de9

Browse files
authored
fix: anthropic cache improvements (#781)
SDK's `providerOptions.anthropic.cacheControl` isn't translated to raw `cache_control` for tools/content parts. Fixed via HTTP-level fetch wrapper injecting `cache_control` on last tool and second-to-last message. Also: extended cache detection to gateway providers (`mux-gateway:anthropic/...`), fixed array content handling, tools now recreated via `createTool()` (SDK requires providerOptions at creation).
1 parent 580111d commit 15d1de9

File tree

6 files changed

+292
-48
lines changed

6 files changed

+292
-48
lines changed

src/common/constants/providers.test.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ describe("Provider Registry", () => {
1010
expect(Object.keys(PROVIDER_REGISTRY).length).toBeGreaterThan(0);
1111
});
1212

13-
test("all registry values are import functions", () => {
14-
// Registry should map provider names to async import functions
13+
test("all registry values are import functions that return promises", () => {
14+
// Registry should map provider names to functions returning promises
1515
for (const importFn of Object.values(PROVIDER_REGISTRY)) {
1616
expect(typeof importFn).toBe("function");
17-
expect(importFn.constructor.name).toBe("AsyncFunction");
17+
// Verify calling the function returns a Promise (don't await - just type check)
18+
const result = importFn();
19+
expect(result).toBeInstanceOf(Promise);
1820
}
1921
});
2022

src/common/constants/providers.ts

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,56 +9,56 @@
99
/**
1010
* Dynamically import the Anthropic provider package
1111
*/
12-
export async function importAnthropic() {
13-
return await import("@ai-sdk/anthropic");
12+
export function importAnthropic() {
13+
return import("@ai-sdk/anthropic");
1414
}
1515

1616
/**
1717
* Dynamically import the OpenAI provider package
1818
*/
19-
export async function importOpenAI() {
20-
return await import("@ai-sdk/openai");
19+
export function importOpenAI() {
20+
return import("@ai-sdk/openai");
2121
}
2222

2323
/**
2424
* Dynamically import the Ollama provider package
2525
*/
26-
export async function importOllama() {
27-
return await import("ollama-ai-provider-v2");
26+
export function importOllama() {
27+
return import("ollama-ai-provider-v2");
2828
}
2929

3030
/**
3131
* Dynamically import the Google provider package
3232
*/
33-
export async function importGoogle() {
34-
return await import("@ai-sdk/google");
33+
export function importGoogle() {
34+
return import("@ai-sdk/google");
3535
}
3636

3737
/**
3838
* Dynamically import the OpenRouter provider package
3939
*/
40-
export async function importOpenRouter() {
41-
return await import("@openrouter/ai-sdk-provider");
40+
export function importOpenRouter() {
41+
return import("@openrouter/ai-sdk-provider");
4242
}
4343

4444
/**
4545
* Dynamically import the xAI provider package
4646
*/
47-
export async function importXAI() {
48-
return await import("@ai-sdk/xai");
47+
export function importXAI() {
48+
return import("@ai-sdk/xai");
4949
}
5050

5151
/**
5252
* Dynamically import the Amazon Bedrock provider package
5353
*/
54-
export async function importBedrock() {
54+
export function importBedrock() {
5555
return import("@ai-sdk/amazon-bedrock");
5656
}
5757

5858
/**
5959
* Dynamically import the Gateway provider from the AI SDK
6060
*/
61-
export async function importMuxGateway() {
61+
export function importMuxGateway() {
6262
return import("ai");
6363
}
6464

src/common/utils/ai/cacheStrategy.test.ts

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,22 @@ import {
1111

1212
describe("cacheStrategy", () => {
1313
describe("supportsAnthropicCache", () => {
14-
it("should return true for Anthropic models", () => {
14+
it("should return true for direct Anthropic models", () => {
1515
expect(supportsAnthropicCache("anthropic:claude-3-5-sonnet-20241022")).toBe(true);
1616
expect(supportsAnthropicCache("anthropic:claude-3-5-haiku-20241022")).toBe(true);
1717
});
1818

19+
it("should return true for gateway providers routing to Anthropic", () => {
20+
expect(supportsAnthropicCache("mux-gateway:anthropic/claude-opus-4-5")).toBe(true);
21+
expect(supportsAnthropicCache("mux-gateway:anthropic/claude-sonnet-4-5-20250514")).toBe(true);
22+
expect(supportsAnthropicCache("openrouter:anthropic/claude-3.5-sonnet")).toBe(true);
23+
});
24+
1925
it("should return false for non-Anthropic models", () => {
2026
expect(supportsAnthropicCache("openai:gpt-4")).toBe(false);
2127
expect(supportsAnthropicCache("google:gemini-2.0")).toBe(false);
2228
expect(supportsAnthropicCache("openrouter:meta-llama/llama-3.1")).toBe(false);
29+
expect(supportsAnthropicCache("mux-gateway:openai/gpt-5.1")).toBe(false);
2330
});
2431
});
2532

@@ -83,6 +90,46 @@ describe("cacheStrategy", () => {
8390
});
8491
expect(result[1]).toEqual(messages[1]); // Last message unchanged
8592
});
93+
94+
it("should add cache control to last content part for array content", () => {
95+
// Messages with array content (typical for user/assistant with multiple parts)
96+
const messages: ModelMessage[] = [
97+
{
98+
role: "user",
99+
content: [
100+
{ type: "text", text: "Hello" },
101+
{ type: "text", text: "World" },
102+
],
103+
},
104+
{
105+
role: "assistant",
106+
content: [
107+
{ type: "text", text: "Hi there!" },
108+
{ type: "text", text: "How can I help?" },
109+
],
110+
},
111+
{ role: "user", content: "Final question" },
112+
];
113+
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
114+
115+
expect(result[0]).toEqual(messages[0]); // First message unchanged
116+
117+
// Second message (array content): cache control on LAST content part only
118+
const secondMsg = result[1];
119+
expect(secondMsg.role).toBe("assistant");
120+
expect(Array.isArray(secondMsg.content)).toBe(true);
121+
const content = secondMsg.content as Array<{
122+
type: string;
123+
text: string;
124+
providerOptions?: unknown;
125+
}>;
126+
expect(content[0].providerOptions).toBeUndefined(); // First part unchanged
127+
expect(content[1].providerOptions).toEqual({
128+
anthropic: { cacheControl: { type: "ephemeral" } },
129+
}); // Last part has cache control
130+
131+
expect(result[2]).toEqual(messages[2]); // Last message unchanged
132+
});
86133
});
87134

88135
describe("createCachedSystemMessage", () => {
@@ -198,5 +245,48 @@ describe("cacheStrategy", () => {
198245
applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet");
199246
expect(mockTools).toEqual(originalTools);
200247
});
248+
249+
it("should handle provider-defined tools without recreating them", () => {
250+
// Provider-defined tools (like Anthropic's webSearch) have type: "provider-defined"
251+
// and cannot be recreated with createTool() - they have special internal properties
252+
const providerDefinedTool = {
253+
type: "provider-defined" as const,
254+
id: "web_search",
255+
name: "web_search_20250305",
256+
args: { maxUses: 1000 },
257+
// Note: no description or execute - these are handled internally by the SDK
258+
};
259+
260+
const toolsWithProviderDefined: Record<string, Tool> = {
261+
readFile: tool({
262+
description: "Read a file",
263+
inputSchema: z.object({ path: z.string() }),
264+
execute: () => Promise.resolve({ success: true }),
265+
}),
266+
// Provider-defined tool as last tool (typical for Anthropic web search)
267+
web_search: providerDefinedTool as unknown as Tool,
268+
};
269+
270+
const result = applyCacheControlToTools(
271+
toolsWithProviderDefined,
272+
"anthropic:claude-3-5-sonnet"
273+
);
274+
275+
// Verify all tools are present
276+
expect(Object.keys(result)).toEqual(Object.keys(toolsWithProviderDefined));
277+
278+
// First tool should be unchanged
279+
expect(result.readFile).toEqual(toolsWithProviderDefined.readFile);
280+
281+
// Provider-defined tool should have cache control added but retain its type
282+
const cachedWebSearch = result.web_search as unknown as {
283+
type: string;
284+
providerOptions: unknown;
285+
};
286+
expect(cachedWebSearch.type).toBe("provider-defined");
287+
expect(cachedWebSearch.providerOptions).toEqual({
288+
anthropic: { cacheControl: { type: "ephemeral" } },
289+
});
290+
});
201291
});
202292
});

src/common/utils/ai/cacheStrategy.ts

Lines changed: 98 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,75 @@
1-
import type { ModelMessage, Tool } from "ai";
1+
import { tool as createTool, type ModelMessage, type Tool } from "ai";
22

33
/**
4-
* Check if a model supports Anthropic cache control
4+
* Check if a model supports Anthropic cache control.
5+
* Matches:
6+
* - Direct Anthropic provider: "anthropic:claude-opus-4-5"
7+
* - Gateway providers routing to Anthropic: "mux-gateway:anthropic/claude-opus-4-5"
8+
* - OpenRouter Anthropic models: "openrouter:anthropic/claude-3.5-sonnet"
59
*/
610
export function supportsAnthropicCache(modelString: string): boolean {
7-
return modelString.startsWith("anthropic:");
11+
// Direct Anthropic provider
12+
if (modelString.startsWith("anthropic:")) {
13+
return true;
14+
}
15+
// Gateway/router providers routing to Anthropic (format: "provider:anthropic/model")
16+
const [, modelId] = modelString.split(":");
17+
if (modelId?.startsWith("anthropic/")) {
18+
return true;
19+
}
20+
return false;
21+
}
22+
23+
/** Cache control providerOptions for Anthropic */
24+
const ANTHROPIC_CACHE_CONTROL = {
25+
anthropic: {
26+
cacheControl: { type: "ephemeral" as const },
27+
},
28+
};
29+
30+
/**
31+
* Add providerOptions to the last content part of a message.
32+
* The SDK requires providerOptions on content parts, not on the message itself.
33+
*
34+
* For system messages with string content, we use message-level providerOptions
35+
* (which the SDK handles correctly). For user/assistant messages with array
36+
* content, we add providerOptions to the last content part.
37+
*/
38+
function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
39+
const content = msg.content;
40+
41+
// String content (typically system messages): use message-level providerOptions
42+
// The SDK correctly translates this for system messages
43+
if (typeof content === "string") {
44+
return {
45+
...msg,
46+
providerOptions: ANTHROPIC_CACHE_CONTROL,
47+
};
48+
}
49+
50+
// Array content: add providerOptions to the last part
51+
// Use type assertion since we're adding providerOptions which is valid but not in base types
52+
if (Array.isArray(content) && content.length > 0) {
53+
const lastIndex = content.length - 1;
54+
const newContent = content.map((part, i) =>
55+
i === lastIndex ? { ...part, providerOptions: ANTHROPIC_CACHE_CONTROL } : part
56+
);
57+
// Type assertion needed: ModelMessage types are strict unions but providerOptions
58+
// on content parts is valid per SDK docs
59+
const result = { ...msg, content: newContent };
60+
return result as ModelMessage;
61+
}
62+
63+
// Empty or unexpected content: return as-is
64+
return msg;
865
}
966

1067
/**
1168
* Apply cache control to messages for Anthropic models.
1269
* Caches all messages except the last user message for optimal cache hits.
70+
*
71+
* NOTE: The SDK requires providerOptions on content parts, not on the message.
72+
* We add cache_control to the last content part of the second-to-last message.
1373
*/
1474
export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] {
1575
// Only apply cache control for Anthropic models
@@ -28,16 +88,7 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string)
2888

2989
return messages.map((msg, index) => {
3090
if (index === cacheIndex) {
31-
return {
32-
...msg,
33-
providerOptions: {
34-
anthropic: {
35-
cacheControl: {
36-
type: "ephemeral" as const,
37-
},
38-
},
39-
},
40-
};
91+
return addCacheControlToLastContentPart(msg);
4192
}
4293
return msg;
4394
});
@@ -77,6 +128,9 @@ export function createCachedSystemMessage(
77128
* 2. Conversation history (1 breakpoint)
78129
* 3. Last tool only (1 breakpoint) - caches all tools up to and including this one
79130
* = 3 total, leaving 1 for future use
131+
*
132+
* NOTE: The SDK requires providerOptions to be passed during tool() creation,
133+
* not added afterwards. We re-create the last tool with providerOptions included.
80134
*/
81135
export function applyCacheControlToTools<T extends Record<string, Tool>>(
82136
tools: T,
@@ -95,23 +149,41 @@ export function applyCacheControlToTools<T extends Record<string, Tool>>(
95149
// Anthropic caches everything up to the cache breakpoint, so marking
96150
// only the last tool will cache all tools
97151
const cachedTools = {} as unknown as T;
98-
for (const [key, tool] of Object.entries(tools)) {
152+
for (const [key, existingTool] of Object.entries(tools)) {
99153
if (key === lastToolKey) {
100-
// Last tool gets cache control
101-
const cachedTool = {
102-
...tool,
103-
providerOptions: {
104-
anthropic: {
105-
cacheControl: {
106-
type: "ephemeral" as const,
154+
// For provider-defined tools (like Anthropic's webSearch), we cannot recreate them
155+
// with createTool() - they have special properties. Instead, spread providerOptions
156+
// directly onto the tool object. While this doesn't work for regular tools (SDK
157+
// requires providerOptions at creation time), provider-defined tools handle it.
158+
const isProviderDefinedTool = (existingTool as { type?: string }).type === "provider-defined";
159+
160+
if (isProviderDefinedTool) {
161+
// Provider-defined tools: add providerOptions directly (SDK handles it differently)
162+
cachedTools[key as keyof T] = {
163+
...existingTool,
164+
providerOptions: {
165+
anthropic: {
166+
cacheControl: { type: "ephemeral" },
107167
},
108168
},
109-
},
110-
};
111-
cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T];
169+
} as unknown as T[keyof T];
170+
} else {
171+
// Regular tools: re-create with providerOptions (SDK requires this at creation time)
172+
const cachedTool = createTool({
173+
description: existingTool.description,
174+
inputSchema: existingTool.inputSchema,
175+
execute: existingTool.execute,
176+
providerOptions: {
177+
anthropic: {
178+
cacheControl: { type: "ephemeral" },
179+
},
180+
},
181+
});
182+
cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T];
183+
}
112184
} else {
113-
// Other tools are copied as-is (use unknown for type safety)
114-
cachedTools[key as keyof T] = tool as unknown as T[keyof T];
185+
// Other tools are copied as-is
186+
cachedTools[key as keyof T] = existingTool as unknown as T[keyof T];
115187
}
116188
}
117189

src/common/utils/tools/tools.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ export async function getToolsForModel(
125125
const { anthropic } = await import("@ai-sdk/anthropic");
126126
allTools = {
127127
...baseTools,
128-
web_search: anthropic.tools.webSearch_20250305({ maxUses: 1000 }),
128+
// Type assertion needed due to SDK version mismatch between ai and @ai-sdk/anthropic
129+
web_search: anthropic.tools.webSearch_20250305({ maxUses: 1000 }) as Tool,
129130
};
130131
break;
131132
}
@@ -136,9 +137,10 @@ export async function getToolsForModel(
136137
const { openai } = await import("@ai-sdk/openai");
137138
allTools = {
138139
...baseTools,
140+
// Type assertion needed due to SDK version mismatch between ai and @ai-sdk/openai
139141
web_search: openai.tools.webSearch({
140142
searchContextSize: "high",
141-
}),
143+
}) as Tool,
142144
};
143145
}
144146
break;

0 commit comments

Comments
 (0)