Skip to content

Commit a602031

Browse files
committed
server/llm: improve token counting
1 parent 1c01445 commit a602031

File tree

6 files changed

+297
-92
lines changed

6 files changed

+297
-92
lines changed

src/packages/server/llm/anthropic.ts

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import { ChatAnthropic } from "@langchain/anthropic";
2+
import { AIMessageChunk } from "@langchain/core/messages";
23
import {
34
ChatPromptTemplate,
45
MessagesPlaceholder,
56
} from "@langchain/core/prompts";
67
import { RunnableWithMessageHistory } from "@langchain/core/runnables";
8+
import { concat } from "@langchain/core/utils/stream";
79

810
import getLogger from "@cocalc/backend/logger";
911
import { getServerSettings } from "@cocalc/database/settings";
@@ -108,24 +110,57 @@ export async function evaluateAnthropic(
108110

109111
const chunks = await chainWithHistory.stream({ input });
110112

113+
let finalResult: AIMessageChunk | undefined;
111114
let output = "";
112115
for await (const chunk of chunks) {
113116
const { content } = chunk;
114117
if (typeof content !== "string") continue;
115118
output += content;
116119
opts.stream?.(content);
120+
121+
// Collect the final result to check for usage metadata
122+
if (finalResult) {
123+
finalResult = concat(finalResult, chunk);
124+
} else {
125+
finalResult = chunk;
126+
}
117127
}
118128

119129
opts.stream?.(null);
120130

121-
// we use that GPT3 tokenizer to get an approximate number of tokens
122-
const prompt_tokens = numTokens(input) + historyTokens;
123-
const completion_tokens = numTokens(output);
124-
125-
return {
126-
output,
127-
total_tokens: prompt_tokens + completion_tokens,
128-
completion_tokens,
129-
prompt_tokens,
130-
};
131+
// Check for usage metadata from LangChain first (more accurate)
132+
const usage_metadata = finalResult?.usage_metadata;
133+
log.debug("usage_metadata", usage_metadata);
134+
135+
if (usage_metadata) {
136+
const { input_tokens, output_tokens, total_tokens } = usage_metadata;
137+
log.debug("evaluateAnthropic successful (using usage_metadata)", {
138+
input_tokens,
139+
output_tokens,
140+
total_tokens,
141+
});
142+
143+
return {
144+
output,
145+
total_tokens,
146+
completion_tokens: output_tokens,
147+
prompt_tokens: input_tokens,
148+
};
149+
} else {
150+
// Fallback to manual token counting (approximation using GPT-3 tokenizer)
151+
const prompt_tokens = numTokens(input) + historyTokens;
152+
const completion_tokens = numTokens(output);
153+
154+
log.debug("evaluateAnthropic successful (using manual counting)", {
155+
prompt_tokens,
156+
completion_tokens,
157+
});
158+
159+
return {
160+
output,
161+
total_tokens: prompt_tokens + completion_tokens,
162+
completion_tokens,
163+
prompt_tokens,
164+
};
165+
}
131166
}

src/packages/server/llm/custom-openai.ts

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@ import {
44
isCustomOpenAI,
55
} from "@cocalc/util/db-schema/llm-utils";
66
import type { ChatOutput, History, Stream } from "@cocalc/util/types/llm";
7+
import { AIMessageChunk } from "@langchain/core/messages";
78
import {
89
ChatPromptTemplate,
910
MessagesPlaceholder,
1011
} from "@langchain/core/prompts";
1112
import { RunnableWithMessageHistory } from "@langchain/core/runnables";
13+
import { concat } from "@langchain/core/utils/stream";
1214
import {
1315
ChatOpenAI as ChatOpenAILC,
1416
OpenAICallOptions,
@@ -75,6 +77,7 @@ export async function evaluateCustomOpenAI(
7577

7678
const chunks = await chainWithHistory.stream({ input });
7779

80+
let finalResult: AIMessageChunk | undefined;
7881
let output = "";
7982
for await (const chunk of chunks) {
8083
const { content } = chunk;
@@ -83,19 +86,51 @@ export async function evaluateCustomOpenAI(
8386
}
8487
output += content;
8588
opts.stream?.(content);
89+
90+
// Collect the final result to check for usage metadata
91+
if (finalResult) {
92+
finalResult = concat(finalResult, chunk);
93+
} else {
94+
finalResult = chunk;
95+
}
8696
}
8797

8898
// and an empty call when done
8999
opts.stream?.(null);
90100

91-
// we use that GPT3 tokenizer to get an approximate number of tokens
92-
const prompt_tokens = numTokens(input) + historyTokens;
93-
const completion_tokens = numTokens(output);
101+
// Check for usage metadata from LangChain first (more accurate)
102+
const usage_metadata = finalResult?.usage_metadata;
103+
log.debug("usage_metadata", usage_metadata);
104+
105+
if (usage_metadata) {
106+
const { input_tokens, output_tokens, total_tokens } = usage_metadata;
107+
log.debug("evaluateCustomOpenAI successful (using usage_metadata)", {
108+
input_tokens,
109+
output_tokens,
110+
total_tokens,
111+
});
112+
113+
return {
114+
output,
115+
total_tokens,
116+
completion_tokens: output_tokens,
117+
prompt_tokens: input_tokens,
118+
};
119+
} else {
120+
// Fallback to manual token counting (approximation using GPT-3 tokenizer)
121+
const prompt_tokens = numTokens(input) + historyTokens;
122+
const completion_tokens = numTokens(output);
94123

95-
return {
96-
output,
97-
total_tokens: prompt_tokens + completion_tokens,
98-
completion_tokens,
99-
prompt_tokens,
100-
};
124+
log.debug("evaluateCustomOpenAI successful (using manual counting)", {
125+
prompt_tokens,
126+
completion_tokens,
127+
});
128+
129+
return {
130+
output,
131+
total_tokens: prompt_tokens + completion_tokens,
132+
completion_tokens,
133+
prompt_tokens,
134+
};
135+
}
101136
}

src/packages/server/llm/google-genai-client.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ export class GoogleGenAIClient {
163163
// Check for usage metadata from LangChain first (more accurate, includes thinking tokens)
164164
const usage_metadata = finalResult?.usage_metadata;
165165
log.debug("usage_metadata", usage_metadata);
166-
console.log("usage_metadata", usage_metadata);
167166

168167
if (usage_metadata) {
169168
const { input_tokens, output_tokens, total_tokens } = usage_metadata;

src/packages/server/llm/google-lc.ts

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import { AIMessageChunk } from "@langchain/core/messages";
12
import {
23
ChatPromptTemplate,
34
MessagesPlaceholder,
45
} from "@langchain/core/prompts";
56
import { RunnableWithMessageHistory } from "@langchain/core/runnables";
7+
import { concat } from "@langchain/core/utils/stream";
68

79
import getLogger from "@cocalc/backend/logger";
810
import { getServerSettings } from "@cocalc/database/settings";
@@ -94,24 +96,57 @@ export async function evaluateGoogleGenAILC(
9496

9597
const chunks = await chainWithHistory.stream({ input });
9698

99+
let finalResult: AIMessageChunk | undefined;
97100
let output = "";
98101
for await (const chunk of chunks) {
99102
const { content } = chunk;
100103
if (typeof content !== "string") continue;
101104
output += content;
102105
opts.stream?.(content);
106+
107+
// Collect the final result to check for usage metadata
108+
if (finalResult) {
109+
finalResult = concat(finalResult, chunk);
110+
} else {
111+
finalResult = chunk;
112+
}
103113
}
104114

105115
opts.stream?.(null);
106116

107-
// we use that GPT3 tokenizer to get an approximate number of tokens
108-
const prompt_tokens = numTokens(input) + historyTokens;
109-
const completion_tokens = numTokens(output);
110-
111-
return {
112-
output,
113-
total_tokens: prompt_tokens + completion_tokens,
114-
completion_tokens,
115-
prompt_tokens,
116-
};
117+
// Check for usage metadata from LangChain first (more accurate)
118+
const usage_metadata = finalResult?.usage_metadata;
119+
log.debug("usage_metadata", usage_metadata);
120+
121+
if (usage_metadata) {
122+
const { input_tokens, output_tokens, total_tokens } = usage_metadata;
123+
log.debug("evaluateGoogleGenAILC successful (using usage_metadata)", {
124+
input_tokens,
125+
output_tokens,
126+
total_tokens,
127+
});
128+
129+
return {
130+
output,
131+
total_tokens,
132+
completion_tokens: output_tokens,
133+
prompt_tokens: input_tokens,
134+
};
135+
} else {
136+
// Fallback to manual token counting (approximation using GPT-3 tokenizer)
137+
const prompt_tokens = numTokens(input) + historyTokens;
138+
const completion_tokens = numTokens(output);
139+
140+
log.debug("evaluateGoogleGenAILC successful (using manual counting)", {
141+
prompt_tokens,
142+
completion_tokens,
143+
});
144+
145+
return {
146+
output,
147+
total_tokens: prompt_tokens + completion_tokens,
148+
completion_tokens,
149+
prompt_tokens,
150+
};
151+
}
117152
}

src/packages/server/llm/mistral.ts

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import { AIMessageChunk } from "@langchain/core/messages";
12
import {
23
ChatPromptTemplate,
34
MessagesPlaceholder,
45
} from "@langchain/core/prompts";
56
import { RunnableWithMessageHistory } from "@langchain/core/runnables";
7+
import { concat } from "@langchain/core/utils/stream";
68
import { ChatMistralAI, ChatMistralAIInput } from "@langchain/mistralai";
79
import getLogger from "@cocalc/backend/logger";
810
import { getServerSettings } from "@cocalc/database/settings";
@@ -86,24 +88,57 @@ export async function evaluateMistral(
8688

8789
const chunks = await chainWithHistory.stream({ input });
8890

91+
let finalResult: AIMessageChunk | undefined;
8992
let output = "";
9093
for await (const chunk of chunks) {
9194
const { content } = chunk;
9295
if (typeof content !== "string") continue;
9396
output += content;
9497
opts.stream?.(content);
98+
99+
// Collect the final result to check for usage metadata
100+
if (finalResult) {
101+
finalResult = concat(finalResult, chunk);
102+
} else {
103+
finalResult = chunk;
104+
}
95105
}
96106

97107
opts.stream?.(null);
98108

99-
// we use that GPT3 tokenizer to get an approximate number of tokens
100-
const prompt_tokens = numTokens(input) + historyTokens;
101-
const completion_tokens = numTokens(output);
102-
103-
return {
104-
output,
105-
total_tokens: prompt_tokens + completion_tokens,
106-
completion_tokens,
107-
prompt_tokens,
108-
};
109+
// Check for usage metadata from LangChain first (more accurate)
110+
const usage_metadata = finalResult?.usage_metadata;
111+
log.debug("usage_metadata", usage_metadata);
112+
113+
if (usage_metadata) {
114+
const { input_tokens, output_tokens, total_tokens } = usage_metadata;
115+
log.debug("evaluateMistral successful (using usage_metadata)", {
116+
input_tokens,
117+
output_tokens,
118+
total_tokens,
119+
});
120+
121+
return {
122+
output,
123+
total_tokens,
124+
completion_tokens: output_tokens,
125+
prompt_tokens: input_tokens,
126+
};
127+
} else {
128+
// Fallback to manual token counting (approximation using GPT-3 tokenizer)
129+
const prompt_tokens = numTokens(input) + historyTokens;
130+
const completion_tokens = numTokens(output);
131+
132+
log.debug("evaluateMistral successful (using manual counting)", {
133+
prompt_tokens,
134+
completion_tokens,
135+
});
136+
137+
return {
138+
output,
139+
total_tokens: prompt_tokens + completion_tokens,
140+
completion_tokens,
141+
prompt_tokens,
142+
};
143+
}
109144
}

0 commit comments

Comments
 (0)