5
5
*/
6
6
7
7
import { GenerativeModel , GoogleGenerativeAI } from "@google/generative-ai" ;
8
+ import { AIMessageChunk } from "@langchain/core/messages" ;
8
9
import {
9
10
ChatPromptTemplate ,
10
11
MessagesPlaceholder ,
11
12
} from "@langchain/core/prompts" ;
12
13
import { RunnableWithMessageHistory } from "@langchain/core/runnables" ;
14
+ import { concat } from "@langchain/core/utils/stream" ;
13
15
import { ChatGoogleGenerativeAI } from "@langchain/google-genai" ;
14
16
import getLogger from "@cocalc/backend/logger" ;
15
17
import { getServerSettings } from "@cocalc/database/settings" ;
@@ -117,10 +119,7 @@ export class GoogleGenAIClient {
117
119
streaming : true ,
118
120
} ) ;
119
121
120
- // However, we also count tokens, and for that we use "gemini-1.5-pro" only
121
- const geminiPro : GenerativeModel = this . genAI . getGenerativeModel ( {
122
- model : "gemini-1.5-pro" ,
123
- } ) ;
122
+ // Token counting will be done using either usage_metadata or the actual model
124
123
125
124
const prompt = ChatPromptTemplate . fromMessages ( [
126
125
[ "system" , system ?? "" ] ,
@@ -143,33 +142,75 @@ export class GoogleGenAIClient {
143
142
144
143
const chunks = await chainWithHistory . stream ( { input } ) ;
145
144
145
+ let finalResult : AIMessageChunk | undefined ;
146
146
let output = "" ;
147
147
for await ( const chunk of chunks ) {
148
148
const { content } = chunk ;
149
149
if ( typeof content !== "string" ) continue ;
150
150
output += content ;
151
151
stream ?.( content ) ;
152
+
153
+ // Collect the final result to check for usage metadata
154
+ if ( finalResult ) {
155
+ finalResult = concat ( finalResult , chunk ) ;
156
+ } else {
157
+ finalResult = chunk ;
158
+ }
152
159
}
153
160
154
161
stream ?.( null ) ;
155
162
156
- const { totalTokens : prompt_tokens } = await geminiPro . countTokens ( [
157
- input ,
158
- system ?? "" ,
159
- ...history . map ( ( { content } ) => content ) ,
160
- ] ) ;
161
-
162
- const { totalTokens : completion_tokens } = await geminiPro . countTokens (
163
- output ,
164
- ) ;
163
+ // Check for usage metadata from LangChain first (more accurate, includes thinking tokens)
164
+ const usage_metadata = finalResult ?. usage_metadata ;
165
+ log . debug ( "usage_metadata" , usage_metadata ) ;
166
+ console . log ( "usage_metadata" , usage_metadata ) ;
167
+
168
+ if ( usage_metadata ) {
169
+ const { input_tokens, output_tokens, total_tokens } = usage_metadata ;
170
+ log . debug ( "chatGemini successful (using usage_metadata)" , {
171
+ input_tokens,
172
+ output_tokens,
173
+ total_tokens,
174
+ usage_metadata, // Log full metadata to see what other fields might be available
175
+ } ) ;
165
176
166
- log . debug ( "chatGemini successful" , { prompt_tokens, completion_tokens } ) ;
177
+ // For now, return the standard ChatOutput format
178
+ // TODO: Consider extending ChatOutput interface to include thinking_tokens if available
179
+ return {
180
+ output,
181
+ total_tokens,
182
+ completion_tokens : output_tokens ,
183
+ prompt_tokens : input_tokens ,
184
+ } ;
185
+ } else {
186
+ // Fallback to manual token counting using the actual model (not hardcoded)
187
+ const tokenCountingModel : GenerativeModel = this . genAI . getGenerativeModel (
188
+ {
189
+ model : modelName ,
190
+ } ,
191
+ ) ;
192
+
193
+ const { totalTokens : prompt_tokens } =
194
+ await tokenCountingModel . countTokens ( [
195
+ input ,
196
+ system ?? "" ,
197
+ ...history . map ( ( { content } ) => content ) ,
198
+ ] ) ;
199
+
200
+ const { totalTokens : completion_tokens } =
201
+ await tokenCountingModel . countTokens ( output ) ;
202
+
203
+ log . debug ( "chatGemini successful (using manual counting)" , {
204
+ prompt_tokens,
205
+ completion_tokens,
206
+ } ) ;
167
207
168
- return {
169
- output,
170
- total_tokens : prompt_tokens + completion_tokens ,
171
- completion_tokens,
172
- prompt_tokens,
173
- } ;
208
+ return {
209
+ output,
210
+ total_tokens : prompt_tokens + completion_tokens ,
211
+ completion_tokens,
212
+ prompt_tokens,
213
+ } ;
214
+ }
174
215
}
175
216
}
0 commit comments