Skip to content

Commit 1c01445

Browse files
committed
server/llm: revamp counting tokens for google genai
1 parent 27bdd94 commit 1c01445

File tree

2 files changed

+66
-21
lines changed

2 files changed

+66
-21
lines changed

src/packages/server/llm/google-genai-client.ts

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
*/
66

77
import { GenerativeModel, GoogleGenerativeAI } from "@google/generative-ai";
8+
import { AIMessageChunk } from "@langchain/core/messages";
89
import {
910
ChatPromptTemplate,
1011
MessagesPlaceholder,
1112
} from "@langchain/core/prompts";
1213
import { RunnableWithMessageHistory } from "@langchain/core/runnables";
14+
import { concat } from "@langchain/core/utils/stream";
1315
import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
1416
import getLogger from "@cocalc/backend/logger";
1517
import { getServerSettings } from "@cocalc/database/settings";
@@ -117,10 +119,7 @@ export class GoogleGenAIClient {
117119
streaming: true,
118120
});
119121

120-
// However, we also count tokens, and for that we use "gemini-1.5-pro" only
121-
const geminiPro: GenerativeModel = this.genAI.getGenerativeModel({
122-
model: "gemini-1.5-pro",
123-
});
122+
// Token counting will be done using either usage_metadata or the actual model
124123

125124
const prompt = ChatPromptTemplate.fromMessages([
126125
["system", system ?? ""],
@@ -143,33 +142,75 @@ export class GoogleGenAIClient {
143142

144143
const chunks = await chainWithHistory.stream({ input });
145144

145+
let finalResult: AIMessageChunk | undefined;
146146
let output = "";
147147
for await (const chunk of chunks) {
148148
const { content } = chunk;
149149
if (typeof content !== "string") continue;
150150
output += content;
151151
stream?.(content);
152+
153+
// Collect the final result to check for usage metadata
154+
if (finalResult) {
155+
finalResult = concat(finalResult, chunk);
156+
} else {
157+
finalResult = chunk;
158+
}
152159
}
153160

154161
stream?.(null);
155162

156-
const { totalTokens: prompt_tokens } = await geminiPro.countTokens([
157-
input,
158-
system ?? "",
159-
...history.map(({ content }) => content),
160-
]);
161-
162-
const { totalTokens: completion_tokens } = await geminiPro.countTokens(
163-
output,
164-
);
163+
// Check for usage metadata from LangChain first (more accurate, includes thinking tokens)
164+
const usage_metadata = finalResult?.usage_metadata;
165+
log.debug("usage_metadata", usage_metadata);
166+
console.log("usage_metadata", usage_metadata);
167+
168+
if (usage_metadata) {
169+
const { input_tokens, output_tokens, total_tokens } = usage_metadata;
170+
log.debug("chatGemini successful (using usage_metadata)", {
171+
input_tokens,
172+
output_tokens,
173+
total_tokens,
174+
usage_metadata, // Log full metadata to see what other fields might be available
175+
});
165176

166-
log.debug("chatGemini successful", { prompt_tokens, completion_tokens });
177+
// For now, return the standard ChatOutput format
178+
// TODO: Consider extending ChatOutput interface to include thinking_tokens if available
179+
return {
180+
output,
181+
total_tokens,
182+
completion_tokens: output_tokens,
183+
prompt_tokens: input_tokens,
184+
};
185+
} else {
186+
// Fallback to manual token counting using the actual model (not hardcoded)
187+
const tokenCountingModel: GenerativeModel = this.genAI.getGenerativeModel(
188+
{
189+
model: modelName,
190+
},
191+
);
192+
193+
const { totalTokens: prompt_tokens } =
194+
await tokenCountingModel.countTokens([
195+
input,
196+
system ?? "",
197+
...history.map(({ content }) => content),
198+
]);
199+
200+
const { totalTokens: completion_tokens } =
201+
await tokenCountingModel.countTokens(output);
202+
203+
log.debug("chatGemini successful (using manual counting)", {
204+
prompt_tokens,
205+
completion_tokens,
206+
});
167207

168-
return {
169-
output,
170-
total_tokens: prompt_tokens + completion_tokens,
171-
completion_tokens,
172-
prompt_tokens,
173-
};
208+
return {
209+
output,
210+
total_tokens: prompt_tokens + completion_tokens,
211+
completion_tokens,
212+
prompt_tokens,
213+
};
214+
}
174215
}
175216
}

src/packages/server/llm/test/models.test.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,11 @@ const QUERY = {
3838
function checkAnswer(answer) {
3939
const { output, total_tokens, completion_tokens, prompt_tokens } = answer;
4040
expect(output).toContain("100");
41-
expect(total_tokens).toEqual(prompt_tokens + completion_tokens);
41+
// total tokens is more than that sume for "thinking" models like gemini 2.5
42+
// because thinking tokens are not part of this
43+
expect(total_tokens).toBeGreaterThanOrEqual(
44+
prompt_tokens + completion_tokens,
45+
);
4246
expect(prompt_tokens).toBeGreaterThan(5);
4347
expect(completion_tokens).toBeGreaterThan(0);
4448
}

0 commit comments

Comments
 (0)