@@ -11,14 +11,15 @@ import {
11
11
isMistralModel ,
12
12
isOpenAIModel ,
13
13
} from "@cocalc/util/db-schema/llm-utils" ;
14
- // import { evaluateMistral } from "../mistral ";
14
+ import { evaluateGoogleGenAI } from ".." ;
15
15
import { evaluateAnthropic } from "../anthropic" ;
16
+ import { getClient } from "../client" ;
17
+ import { evaluateWithLangChain } from "../evaluate-lc" ;
16
18
import { GoogleGenAIClient } from "../google-genai-client" ;
19
+ import { USE_NEWER_LC_IMPL } from "../index" ;
17
20
import { evaluateMistral } from "../mistral" ;
18
21
import { evaluateOpenAILC } from "../openai-lc" ;
19
22
import { enableModels , setupAPIKeys , test_llm } from "./shared" ;
20
- import { evaluateGoogleGenAI } from ".." ;
21
- import { getClient } from "../client" ;
22
23
23
24
const LLM_TIMEOUT = 10_000 ;
24
25
@@ -54,10 +55,15 @@ async function llmOpenAI(model: LanguageModelCore) {
54
55
throw new Error ( `model: ${ model } is not an OpenAI model` ) ;
55
56
}
56
57
57
- const answer = await evaluateOpenAILC ( {
58
- model,
59
- ...QUERY ,
60
- } ) ;
58
+ const answer = USE_NEWER_LC_IMPL
59
+ ? await evaluateWithLangChain ( {
60
+ model,
61
+ ...QUERY ,
62
+ } )
63
+ : await evaluateOpenAILC ( {
64
+ model,
65
+ ...QUERY ,
66
+ } ) ;
61
67
62
68
checkAnswer ( answer ) ;
63
69
}
@@ -66,12 +72,21 @@ async function llmGoogle(model: LanguageModelCore) {
66
72
if ( ! isGoogleModel ( model ) ) {
67
73
throw new Error ( `model: ${ model } is not a Google model` ) ;
68
74
}
69
- const client = ( await getClient ( model ) ) as GoogleGenAIClient ;
70
- const answer = await evaluateGoogleGenAI ( {
71
- model,
72
- client,
73
- ...QUERY ,
74
- } ) ;
75
+
76
+ const answer = USE_NEWER_LC_IMPL
77
+ ? await evaluateWithLangChain ( {
78
+ model,
79
+ ...QUERY ,
80
+ } )
81
+ : await ( async ( ) => {
82
+ const client = ( await getClient ( model ) ) as GoogleGenAIClient ;
83
+ return await evaluateGoogleGenAI ( {
84
+ model,
85
+ client,
86
+ ...QUERY ,
87
+ } ) ;
88
+ } ) ( ) ;
89
+
75
90
checkAnswer ( answer ) ;
76
91
}
77
92
@@ -80,95 +95,96 @@ test_llm("openai")("OpenAI", () => {
80
95
test (
81
96
"gpt3.5 works" ,
82
97
async ( ) => {
83
- llmOpenAI ( "gpt-3.5-turbo" ) ;
98
+ await llmOpenAI ( "gpt-3.5-turbo" ) ;
84
99
} ,
85
100
LLM_TIMEOUT ,
86
101
) ;
87
102
test (
88
103
"gpt 4 works" ,
89
104
async ( ) => {
90
- llmOpenAI ( "gpt-4" ) ;
105
+ await llmOpenAI ( "gpt-4" ) ;
91
106
} ,
92
107
LLM_TIMEOUT ,
93
108
) ;
94
109
test (
95
110
"gpt 4 turbo works" ,
96
111
async ( ) => {
97
- llmOpenAI ( "gpt-4-turbo-8k" ) ;
112
+ await llmOpenAI ( "gpt-4-turbo-8k" ) ;
98
113
} ,
99
114
LLM_TIMEOUT ,
100
115
) ;
101
116
test (
102
117
"gpt 4 omni works" ,
103
118
async ( ) => {
104
- llmOpenAI ( "gpt-4o-8k" ) ;
119
+ await llmOpenAI ( "gpt-4o-8k" ) ;
105
120
} ,
106
121
LLM_TIMEOUT ,
107
122
) ;
108
123
test (
109
124
"gpt 4o mini works" ,
110
125
async ( ) => {
111
- llmOpenAI ( "gpt-4o-mini-8k" ) ;
126
+ await llmOpenAI ( "gpt-4o-mini-8k" ) ;
112
127
} ,
113
128
LLM_TIMEOUT ,
114
129
) ;
115
130
test (
116
131
"gpt 4.1 works" ,
117
132
async ( ) => {
118
- llmOpenAI ( "gpt-4.1" ) ;
133
+ await llmOpenAI ( "gpt-4.1" ) ;
119
134
} ,
120
135
LLM_TIMEOUT ,
121
136
) ;
122
137
test (
123
- "gpt 4.1 mini works" ,
138
+ "openai 4.1 mini works" ,
124
139
async ( ) => {
125
140
llmOpenAI ( "gpt-4.1-mini" ) ;
126
141
} ,
127
142
LLM_TIMEOUT ,
128
143
) ;
129
144
130
- // test("gpt o1", async () => {
131
- // llmOpenAI("o1-8k");
132
- // });
133
- // test("gpt o1 mini works", async () => {
134
- // llmOpenAI("o1-mini-8k");
135
- // });
145
+ test ( "openai o1" , async ( ) => {
146
+ await llmOpenAI ( "o1-8k" ) ;
147
+ } ) ;
148
+
149
+ test ( "gpt o1 mini works" , async ( ) => {
150
+ await llmOpenAI ( "o1-mini-8k" ) ;
151
+ } ) ;
136
152
} ) ;
137
153
138
154
// ATTN: does not work everywhere around, geolocation matters
139
155
test_llm ( "google" ) ( "Google GenAI" , ( ) => {
140
156
test (
141
157
"gemini 1.5 pro works" ,
142
158
async ( ) => {
143
- llmGoogle ( "gemini-1.5-pro" ) ;
159
+ await llmGoogle ( "gemini-1.5-pro" ) ;
144
160
} ,
145
161
LLM_TIMEOUT ,
146
162
) ;
147
163
test (
148
164
"gemini 2.0 flash works" ,
149
165
async ( ) => {
150
- llmGoogle ( "gemini-2.0-flash-8k" ) ;
166
+ await llmGoogle ( "gemini-2.0-flash-8k" ) ;
151
167
} ,
152
168
LLM_TIMEOUT ,
153
169
) ;
154
170
test (
155
171
"gemini 2.0 flash lite works" ,
156
172
async ( ) => {
157
- llmGoogle ( "gemini-2.0-flash-lite-8k" ) ;
173
+ await llmGoogle ( "gemini-2.0-flash-lite-8k" ) ;
158
174
} ,
159
175
LLM_TIMEOUT ,
160
176
) ;
161
177
test (
162
178
"gemini 2.5 flash works" ,
163
179
async ( ) => {
164
- llmGoogle ( "gemini-2.5-flash-8k" ) ;
180
+ await llmGoogle ( "gemini-2.5-flash-8k" ) ;
165
181
} ,
166
182
LLM_TIMEOUT ,
167
183
) ;
168
184
test (
169
185
"gemini 2.5 pro works" ,
170
186
async ( ) => {
171
- llmGoogle ( "gemini-2.5-pro-8k" ) ;
187
+ await llmGoogle ( "gemini-2.5-pro-8k" ) ;
172
188
} ,
173
189
LLM_TIMEOUT ,
174
190
) ;
@@ -188,7 +204,9 @@ test_llm("mistralai")("Mistral AI", () => {
188
204
test (
189
205
"small" ,
190
206
async ( ) => {
191
- const answer = await evaluateMistral ( { model : small , ...QUERY } ) ;
207
+ const answer = USE_NEWER_LC_IMPL
208
+ ? await evaluateWithLangChain ( { model : small , ...QUERY } )
209
+ : await evaluateMistral ( { model : small , ...QUERY } ) ;
192
210
checkAnswer ( answer ) ;
193
211
} ,
194
212
LLM_TIMEOUT ,
@@ -197,7 +215,9 @@ test_llm("mistralai")("Mistral AI", () => {
197
215
test (
198
216
"medium" ,
199
217
async ( ) => {
200
- const answer = await evaluateMistral ( { model : medium , ...QUERY } ) ;
218
+ const answer = USE_NEWER_LC_IMPL
219
+ ? await evaluateWithLangChain ( { model : medium , ...QUERY } )
220
+ : await evaluateMistral ( { model : medium , ...QUERY } ) ;
201
221
checkAnswer ( answer ) ;
202
222
} ,
203
223
LLM_TIMEOUT ,
@@ -206,7 +226,9 @@ test_llm("mistralai")("Mistral AI", () => {
206
226
test (
207
227
"large" ,
208
228
async ( ) => {
209
- const answer = await evaluateMistral ( { model : large , ...QUERY } ) ;
229
+ const answer = USE_NEWER_LC_IMPL
230
+ ? await evaluateWithLangChain ( { model : large , ...QUERY } )
231
+ : await evaluateMistral ( { model : large , ...QUERY } ) ;
210
232
checkAnswer ( answer ) ;
211
233
} ,
212
234
LLM_TIMEOUT ,
@@ -227,7 +249,9 @@ test_llm("anthropic")("Anthropic", () => {
227
249
test (
228
250
"haiku" ,
229
251
async ( ) => {
230
- const answer = await evaluateAnthropic ( { model : haiku , ...QUERY } ) ;
252
+ const answer = USE_NEWER_LC_IMPL
253
+ ? await evaluateWithLangChain ( { model : haiku , ...QUERY } )
254
+ : await evaluateAnthropic ( { model : haiku , ...QUERY } ) ;
231
255
checkAnswer ( answer ) ;
232
256
} ,
233
257
LLM_TIMEOUT ,
@@ -236,7 +260,9 @@ test_llm("anthropic")("Anthropic", () => {
236
260
test (
237
261
"sonnet" ,
238
262
async ( ) => {
239
- const answer = await evaluateAnthropic ( { model : sonnet , ...QUERY } ) ;
263
+ const answer = USE_NEWER_LC_IMPL
264
+ ? await evaluateWithLangChain ( { model : sonnet , ...QUERY } )
265
+ : await evaluateAnthropic ( { model : sonnet , ...QUERY } ) ;
240
266
checkAnswer ( answer ) ;
241
267
} ,
242
268
LLM_TIMEOUT ,
@@ -245,7 +271,9 @@ test_llm("anthropic")("Anthropic", () => {
245
271
test (
246
272
"opus" ,
247
273
async ( ) => {
248
- const answer = await evaluateAnthropic ( { model : opus , ...QUERY } ) ;
274
+ const answer = USE_NEWER_LC_IMPL
275
+ ? await evaluateWithLangChain ( { model : opus , ...QUERY } )
276
+ : await evaluateAnthropic ( { model : opus , ...QUERY } ) ;
249
277
checkAnswer ( answer ) ;
250
278
} ,
251
279
LLM_TIMEOUT ,
0 commit comments