@@ -21,7 +21,6 @@ import {
21
21
isOpenAIModel ,
22
22
} from "@cocalc/util/db-schema/llm-utils" ;
23
23
import type { ChatOutput , History , Stream } from "@cocalc/util/types/llm" ;
24
- import { GoogleGenerativeAI } from "@google/generative-ai" ;
25
24
import { ChatAnthropic } from "@langchain/anthropic" ;
26
25
import { AIMessageChunk } from "@langchain/core/messages" ;
27
26
import {
@@ -60,18 +59,15 @@ export interface LLMProviderConfig {
60
59
createClient : (
61
60
options : LLMEvaluationOptions ,
62
61
settings : ServerSettings ,
62
+ mode : "cocalc" | "user" ,
63
63
) => Promise < any > ;
64
64
65
65
// Model processing
66
- processModel ?: ( model : string ) => string ;
66
+ canonicalModel ?: ( model : string ) => string ;
67
67
68
68
// Special handling flags
69
- supportsStreaming ?: ( model : string ) => boolean ;
70
69
getSystemRole ?: ( model : string ) => string ;
71
70
72
- // Content processing
73
- shouldContinueOnNonString ?: boolean ;
74
-
75
71
// Token counting fallback
76
72
getTokenCountFallback ?: (
77
73
input : string ,
@@ -82,6 +78,10 @@ export interface LLMProviderConfig {
82
78
) => Promise < { prompt_tokens : number ; completion_tokens : number } > ;
83
79
}
84
80
81
+ function isO1Model ( normalizedModel ) {
82
+ return normalizedModel === "o1" || normalizedModel === "o1-mini" ;
83
+ }
84
+
85
85
// Provider configurations
86
86
export const PROVIDER_CONFIGS = {
87
87
openai : {
@@ -95,23 +95,20 @@ export const PROVIDER_CONFIGS = {
95
95
) ;
96
96
97
97
// Check if it's O1 model (doesn't support streaming)
98
- const isO1Model = normalizedModel . startsWith ( "o1-" ) ;
99
-
98
+ const isO1 = isO1Model ( normalizedModel ) ;
100
99
return new ChatOpenAI ( {
101
100
model : normalizedModel ,
102
101
apiKey : options . apiKey || apiKey ,
103
102
maxTokens : options . maxTokens ,
104
- streaming : options . stream != null && ! isO1Model ,
103
+ streaming : options . stream != null && ! isO1 ,
105
104
streamUsage : true ,
106
- ...( options . stream != null && ! isO1Model
105
+ ...( options . stream != null && ! isO1
107
106
? { streamOptions : { includeUsage : true } }
108
107
: { } ) ,
109
108
} ) ;
110
109
} ,
111
- supportsStreaming : ( model ) =>
112
- ! normalizeOpenAIModel ( model ) . startsWith ( "o1-" ) ,
110
+ canonicalModel : ( model ) => normalizeOpenAIModel ( model ) ,
113
111
getSystemRole : ( _model ) => "system" ,
114
- shouldContinueOnNonString : true ,
115
112
getTokenCountFallback : async ( input , output , historyTokens ) => ( {
116
113
prompt_tokens : numTokens ( input ) + historyTokens ,
117
114
completion_tokens : numTokens ( output ) ,
@@ -120,10 +117,13 @@ export const PROVIDER_CONFIGS = {
120
117
121
118
google : {
122
119
name : "Google GenAI" ,
123
- createClient : async ( options , settings ) => {
124
- const { google_vertexai_key : apiKey } = settings ;
120
+ createClient : async ( options , settings , mode ) => {
121
+ const apiKey =
122
+ mode === "cocalc" ? settings . google_vertexai_key : options . apiKey ;
125
123
const modelName =
126
- GOOGLE_MODEL_TO_ID [ options . model as GoogleModel ] ?? options . model ;
124
+ mode === "cocalc"
125
+ ? GOOGLE_MODEL_TO_ID [ options . model as GoogleModel ] ?? options . model
126
+ : options . model ;
127
127
128
128
log . debug (
129
129
`Google createClient: original=${ options . model } , modelName=${ modelName } ` ,
@@ -140,67 +140,47 @@ export const PROVIDER_CONFIGS = {
140
140
streaming : true ,
141
141
} ) ;
142
142
} ,
143
- processModel : ( model ) => GOOGLE_MODEL_TO_ID [ model as GoogleModel ] ?? model ,
144
- shouldContinueOnNonString : true ,
145
- getTokenCountFallback : async (
146
- input ,
147
- output ,
148
- historyTokens ,
149
- model ,
150
- settings ,
151
- ) => {
152
- const { google_vertexai_key : apiKey } = settings ;
153
- const modelName = GOOGLE_MODEL_TO_ID [ model as GoogleModel ] ?? model ;
154
-
155
- const genAI = new GoogleGenerativeAI ( apiKey ) ;
156
- const tokenCountingModel = genAI . getGenerativeModel ( { model : modelName } ) ;
157
-
158
- const { totalTokens : prompt_tokens } =
159
- await tokenCountingModel . countTokens ( [
160
- input ,
161
- // Use historyTokens instead of recalculating
162
- ] ) ;
163
-
164
- const { totalTokens : completion_tokens } =
165
- await tokenCountingModel . countTokens ( output ) ;
166
-
167
- return {
168
- prompt_tokens : prompt_tokens + historyTokens ,
169
- completion_tokens,
170
- } ;
171
- } ,
143
+ canonicalModel : ( model ) =>
144
+ GOOGLE_MODEL_TO_ID [ model as GoogleModel ] ?? model ,
145
+ getTokenCountFallback : async ( input , output , historyTokens ) => ( {
146
+ prompt_tokens : numTokens ( input ) + historyTokens ,
147
+ completion_tokens : numTokens ( output ) ,
148
+ } ) ,
172
149
} ,
173
150
174
151
anthropic : {
175
152
name : "Anthropic" ,
176
- createClient : async ( options , settings ) => {
177
- const { anthropic_api_key : apiKey } = settings ;
178
- const modelVersion = ANTHROPIC_VERSION [ options . model as AnthropicModel ] ;
153
+ createClient : async ( options , settings , mode ) => {
154
+ const apiKey =
155
+ mode === "cocalc" ? settings . anthropic_api_key : options . apiKey ;
156
+ const modelName =
157
+ mode === "cocalc"
158
+ ? ANTHROPIC_VERSION [ options . model as AnthropicModel ]
159
+ : options . model ;
179
160
180
- if ( modelVersion == null ) {
161
+ if ( modelName == null ) {
181
162
throw new Error (
182
163
`Anthropic model ${ options . model } is no longer supported` ,
183
164
) ;
184
165
}
185
166
186
167
log . debug (
187
- `Anthropic createClient: original=${ options . model } , modelVersion=${ modelVersion } ` ,
168
+ `Anthropic createClient: original=${ options . model } , modelVersion=${ modelName } ` ,
188
169
) ;
189
170
190
171
return new ChatAnthropic ( {
191
- model : modelVersion ,
192
- apiKey : options . apiKey || apiKey ,
172
+ model : modelName ,
173
+ apiKey,
193
174
maxTokens : options . maxTokens ,
194
175
} ) ;
195
176
} ,
196
- processModel : ( model ) => {
177
+ canonicalModel : ( model ) => {
197
178
const version = ANTHROPIC_VERSION [ model as AnthropicModel ] ;
198
179
if ( version == null ) {
199
180
throw new Error ( `Anthropic model ${ model } is no longer supported` ) ;
200
181
}
201
182
return version ;
202
183
} ,
203
- shouldContinueOnNonString : true ,
204
184
getTokenCountFallback : async ( input , output , historyTokens ) => ( {
205
185
prompt_tokens : numTokens ( input ) + historyTokens ,
206
186
completion_tokens : numTokens ( output ) ,
@@ -209,17 +189,17 @@ export const PROVIDER_CONFIGS = {
209
189
210
190
mistral : {
211
191
name : "Mistral" ,
212
- createClient : async ( options , settings ) => {
213
- const { mistral_api_key : apiKey } = settings ;
192
+ createClient : async ( options , settings , mode ) => {
193
+ const apiKey =
194
+ mode === "cocalc" ? settings . mistral_api_key : options . apiKey ;
214
195
215
196
log . debug ( `Mistral createClient: model=${ options . model } ` ) ;
216
197
217
198
return new ChatMistralAI ( {
218
199
model : options . model ,
219
- apiKey : options . apiKey || apiKey ,
200
+ apiKey,
220
201
} ) ;
221
202
} ,
222
- shouldContinueOnNonString : true ,
223
203
getTokenCountFallback : async ( input , output , historyTokens ) => ( {
224
204
prompt_tokens : numTokens ( input ) + historyTokens ,
225
205
completion_tokens : numTokens ( output ) ,
@@ -235,8 +215,7 @@ export const PROVIDER_CONFIGS = {
235
215
) ;
236
216
return await getCustomOpenAI ( transformedModel ) ;
237
217
} ,
238
- processModel : ( model ) => fromCustomOpenAIModel ( model ) ,
239
- shouldContinueOnNonString : false , // breaks on non-string content
218
+ canonicalModel : ( model ) => fromCustomOpenAIModel ( model ) ,
240
219
getTokenCountFallback : async ( input , output , historyTokens ) => ( {
241
220
prompt_tokens : numTokens ( input ) + historyTokens ,
242
221
completion_tokens : numTokens ( output ) ,
@@ -269,20 +248,17 @@ function content2string(content: any): string {
269
248
const output0 = content [ 0 ] ;
270
249
if ( output0 ?. type === "text" ) {
271
250
return output0 . text ;
272
- } else {
273
- log . debug ( "content2string unable to process" , content ) ;
274
- return "" ;
275
251
}
276
- } else {
277
- log . debug ( "content2string unable to process" , content ) ;
278
- return "" ;
279
252
}
253
+
254
+ log . debug ( "content2string unable to process" , content ) ;
255
+ return "" ;
280
256
}
281
257
282
258
// Main unified evaluation function
283
259
export async function evaluateWithLangChain (
284
260
options : LLMEvaluationOptions ,
285
- _mode : "cocalc" | "user" = "cocalc" ,
261
+ mode : "cocalc" | "user" = "cocalc" ,
286
262
) : Promise < ChatOutput > {
287
263
const { input, system, history = [ ] , model, stream, maxTokens } = options ;
288
264
@@ -302,10 +278,12 @@ export async function evaluateWithLangChain(
302
278
const settings = await getServerSettings ( ) ;
303
279
304
280
// Create LangChain client
305
- const client = await config . createClient ( options , settings ) ;
281
+ const client = await config . createClient ( options , settings , mode ) ;
306
282
307
- // Process model name if needed (processed model not currently used in this function)
308
- // const _processedModel = config.processModel ? config.processModel(model) : model;
283
+ // Canonical model name
284
+ const canonicalModel = config . canonicalModel
285
+ ? config . canonicalModel ( model )
286
+ : model ;
309
287
310
288
// Determine system role (always use "history" for historyKey)
311
289
const systemRole = config . getSystemRole
@@ -316,8 +294,8 @@ export async function evaluateWithLangChain(
316
294
317
295
// Create prompt template
318
296
// For o1 models, omit the system message entirely since they don't support system roles
319
- const isO1Model = model . includes ( "o1" ) ;
320
- const prompt = isO1Model
297
+ const isO1 = isO1Model ( canonicalModel ) ;
298
+ const prompt = isO1
321
299
? ChatPromptTemplate . fromMessages ( [
322
300
new MessagesPlaceholder ( historyMessagesKey ) ,
323
301
[ "human" , system ? `${ system } \n\n{input}` : "{input}" ] ,
@@ -347,15 +325,10 @@ export async function evaluateWithLangChain(
347
325
} ,
348
326
} ) ;
349
327
350
- // Handle streaming vs non-streaming
351
- const supportsStreaming = config . supportsStreaming
352
- ? config . supportsStreaming ( model )
353
- : true ;
354
-
355
328
let finalResult : AIMessageChunk | undefined ;
356
329
let output = "" ;
357
330
358
- if ( stream && supportsStreaming ) {
331
+ if ( stream ) {
359
332
// Streaming mode
360
333
const chunks = await chainWithHistory . stream ( { input } ) ;
361
334
@@ -364,10 +337,6 @@ export async function evaluateWithLangChain(
364
337
const { content } = chunkTyped ;
365
338
const contentStr = content2string ( content ) ;
366
339
367
- if ( typeof content !== "string" && ! config . shouldContinueOnNonString ) {
368
- break ;
369
- }
370
-
371
340
if ( typeof content === "string" ) {
372
341
output += content ;
373
342
stream ( content ) ;
0 commit comments