Skip to content

Commit 8e8ad54

Browse files
authored
🤖 fix: place Anthropic cache marker on last message (#783)
Previously, the Anthropic cache marker was placed on the second-to-last message. This change moves it to the last message so the entire conversation is cached. _Generated with `mux`_
1 parent d7560e1 commit 8e8ad54

File tree

3 files changed

+45
-32
lines changed

3 files changed

+45
-32
lines changed

src/common/utils/ai/cacheStrategy.test.ts

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,22 @@ describe("cacheStrategy", () => {
4141
expect(result).toEqual(messages);
4242
});
4343

44-
it("should not modify messages if less than 2 messages", () => {
44+
it("should add cache control to single message for Anthropic models", () => {
4545
const messages: ModelMessage[] = [{ role: "user", content: "Hello" }];
4646
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
47-
expect(result).toEqual(messages);
47+
expect(result[0]).toEqual({
48+
...messages[0],
49+
providerOptions: {
50+
anthropic: {
51+
cacheControl: {
52+
type: "ephemeral",
53+
},
54+
},
55+
},
56+
});
4857
});
4958

50-
it("should add cache control to second-to-last message for Anthropic models", () => {
59+
it("should add cache control to last message for Anthropic models", () => {
5160
const messages: ModelMessage[] = [
5261
{ role: "user", content: "Hello" },
5362
{ role: "assistant", content: "Hi there!" },
@@ -56,9 +65,10 @@ describe("cacheStrategy", () => {
5665
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
5766

5867
expect(result[0]).toEqual(messages[0]); // First message unchanged
59-
expect(result[1]).toEqual({
60-
// Second message has cache control
61-
...messages[1],
68+
expect(result[1]).toEqual(messages[1]); // Second message unchanged
69+
expect(result[2]).toEqual({
70+
// Last message has cache control
71+
...messages[2],
6272
providerOptions: {
6373
anthropic: {
6474
cacheControl: {
@@ -67,7 +77,6 @@ describe("cacheStrategy", () => {
6777
},
6878
},
6979
});
70-
expect(result[2]).toEqual(messages[2]); // Last message unchanged
7180
});
7281

7382
it("should work with exactly 2 messages", () => {
@@ -77,9 +86,10 @@ describe("cacheStrategy", () => {
7786
];
7887
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
7988

80-
expect(result[0]).toEqual({
81-
// First message gets cache control
82-
...messages[0],
89+
expect(result[0]).toEqual(messages[0]); // First message unchanged
90+
expect(result[1]).toEqual({
91+
// Last message gets cache control
92+
...messages[1],
8393
providerOptions: {
8494
anthropic: {
8595
cacheControl: {
@@ -88,7 +98,6 @@ describe("cacheStrategy", () => {
8898
},
8999
},
90100
});
91-
expect(result[1]).toEqual(messages[1]); // Last message unchanged
92101
});
93102

94103
it("should add cache control to last content part for array content", () => {
@@ -108,17 +117,24 @@ describe("cacheStrategy", () => {
108117
{ type: "text", text: "How can I help?" },
109118
],
110119
},
111-
{ role: "user", content: "Final question" },
120+
{
121+
role: "user",
122+
content: [
123+
{ type: "text", text: "Final" },
124+
{ type: "text", text: "question" },
125+
],
126+
},
112127
];
113128
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
114129

115130
expect(result[0]).toEqual(messages[0]); // First message unchanged
131+
expect(result[1]).toEqual(messages[1]); // Second message unchanged
116132

117-
// Second message (array content): cache control on LAST content part only
118-
const secondMsg = result[1];
119-
expect(secondMsg.role).toBe("assistant");
120-
expect(Array.isArray(secondMsg.content)).toBe(true);
121-
const content = secondMsg.content as Array<{
133+
// Last message (array content): cache control on LAST content part only
134+
const lastMsg = result[2];
135+
expect(lastMsg.role).toBe("user");
136+
expect(Array.isArray(lastMsg.content)).toBe(true);
137+
const content = lastMsg.content as Array<{
122138
type: string;
123139
text: string;
124140
providerOptions?: unknown;
@@ -127,8 +143,6 @@ describe("cacheStrategy", () => {
127143
expect(content[1].providerOptions).toEqual({
128144
anthropic: { cacheControl: { type: "ephemeral" } },
129145
}); // Last part has cache control
130-
131-
expect(result[2]).toEqual(messages[2]); // Last message unchanged
132146
});
133147
});
134148

src/common/utils/ai/cacheStrategy.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,25 +66,24 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
6666

6767
/**
6868
* Apply cache control to messages for Anthropic models.
69-
* Caches all messages except the last user message for optimal cache hits.
69+
* Adds a cache marker to the last message so the entire conversation is cached.
7070
*
7171
* NOTE: The SDK requires providerOptions on content parts, not on the message.
72-
* We add cache_control to the last content part of the second-to-last message.
72+
* We add cache_control to the last content part of the last message.
7373
*/
7474
export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] {
7575
// Only apply cache control for Anthropic models
7676
if (!supportsAnthropicCache(modelString)) {
7777
return messages;
7878
}
7979

80-
// Need at least 2 messages to add a cache breakpoint
81-
if (messages.length < 2) {
80+
// Need at least 1 message to add a cache breakpoint
81+
if (messages.length < 1) {
8282
return messages;
8383
}
8484

85-
// Add cache breakpoint at the second-to-last message
86-
// This caches everything up to (but not including) the current user message
87-
const cacheIndex = messages.length - 2;
85+
// Add cache breakpoint at the last message
86+
const cacheIndex = messages.length - 1;
8887

8988
return messages.map((msg, index) => {
9089
if (index === cacheIndex) {

src/node/services/aiService.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ if (typeof globalFetchWithExtras.certificate === "function") {
102102
*
103103
* Injects cache_control on:
104104
* 1. Last tool (caches all tool definitions)
105-
* 2. Second-to-last message's last content part (caches conversation history)
105+
* 2. Last message's last content part (caches entire conversation)
106106
*/
107107
function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fetch {
108108
const cachingFetch = async (
@@ -123,11 +123,11 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet
123123
lastTool.cache_control ??= { type: "ephemeral" };
124124
}
125125

126-
// Inject cache_control on second-to-last message's last content part
127-
// This caches conversation history up to (but not including) the current user message
128-
if (Array.isArray(json.messages) && json.messages.length >= 2) {
129-
const secondToLastMsg = json.messages[json.messages.length - 2] as Record<string, unknown>;
130-
const content = secondToLastMsg.content;
126+
// Inject cache_control on last message's last content part
127+
// This caches the entire conversation
128+
if (Array.isArray(json.messages) && json.messages.length >= 1) {
129+
const lastMsg = json.messages[json.messages.length - 1] as Record<string, unknown>;
130+
const content = lastMsg.content;
131131

132132
if (Array.isArray(content) && content.length > 0) {
133133
// Array content: add cache_control to last part

0 commit comments

Comments
 (0)