Skip to content

Commit 4b88bb5

Browse files
committed
server/llm: further tweak generalized langchain processing
1 parent 491cc9c commit 4b88bb5

File tree

6 files changed

+112
-90
lines changed

6 files changed

+112
-90
lines changed

src/packages/frontend/admin/llm/admin-llm-test.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ export function TestLLMAdmin() {
113113
let reply = "";
114114

115115
llmStream.on("token", (token) => {
116+
console.log({ model, system, token });
116117
if (token != null) {
117118
reply += token;
118119
// Update the result in real-time
@@ -133,6 +134,7 @@ export function TestLLMAdmin() {
133134
});
134135

135136
llmStream.on("error", (err) => {
137+
console.error(`Error in LLM stream for model ${model}:`, err);
136138
resolve({
137139
model,
138140
status: "failed",
@@ -144,6 +146,7 @@ export function TestLLMAdmin() {
144146
// Start the stream
145147
llmStream.emit("start");
146148
} catch (err) {
149+
console.error(`Error running test for model ${model}:`, err);
147150
resolve({
148151
model,
149152
status: "failed",

src/packages/server/llm/evaluate-lc.ts

Lines changed: 51 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import {
2121
isOpenAIModel,
2222
} from "@cocalc/util/db-schema/llm-utils";
2323
import type { ChatOutput, History, Stream } from "@cocalc/util/types/llm";
24-
import { GoogleGenerativeAI } from "@google/generative-ai";
2524
import { ChatAnthropic } from "@langchain/anthropic";
2625
import { AIMessageChunk } from "@langchain/core/messages";
2726
import {
@@ -60,18 +59,15 @@ export interface LLMProviderConfig {
6059
createClient: (
6160
options: LLMEvaluationOptions,
6261
settings: ServerSettings,
62+
mode: "cocalc" | "user",
6363
) => Promise<any>;
6464

6565
// Model processing
66-
processModel?: (model: string) => string;
66+
canonicalModel?: (model: string) => string;
6767

6868
// Special handling flags
69-
supportsStreaming?: (model: string) => boolean;
7069
getSystemRole?: (model: string) => string;
7170

72-
// Content processing
73-
shouldContinueOnNonString?: boolean;
74-
7571
// Token counting fallback
7672
getTokenCountFallback?: (
7773
input: string,
@@ -82,6 +78,10 @@ export interface LLMProviderConfig {
8278
) => Promise<{ prompt_tokens: number; completion_tokens: number }>;
8379
}
8480

81+
function isO1Model(normalizedModel) {
82+
return normalizedModel === "o1" || normalizedModel === "o1-mini";
83+
}
84+
8585
// Provider configurations
8686
export const PROVIDER_CONFIGS = {
8787
openai: {
@@ -95,23 +95,20 @@ export const PROVIDER_CONFIGS = {
9595
);
9696

9797
// Check if it's O1 model (doesn't support streaming)
98-
const isO1Model = normalizedModel.startsWith("o1-");
99-
98+
const isO1 = isO1Model(normalizedModel);
10099
return new ChatOpenAI({
101100
model: normalizedModel,
102101
apiKey: options.apiKey || apiKey,
103102
maxTokens: options.maxTokens,
104-
streaming: options.stream != null && !isO1Model,
103+
streaming: options.stream != null && !isO1,
105104
streamUsage: true,
106-
...(options.stream != null && !isO1Model
105+
...(options.stream != null && !isO1
107106
? { streamOptions: { includeUsage: true } }
108107
: {}),
109108
});
110109
},
111-
supportsStreaming: (model) =>
112-
!normalizeOpenAIModel(model).startsWith("o1-"),
110+
canonicalModel: (model) => normalizeOpenAIModel(model),
113111
getSystemRole: (_model) => "system",
114-
shouldContinueOnNonString: true,
115112
getTokenCountFallback: async (input, output, historyTokens) => ({
116113
prompt_tokens: numTokens(input) + historyTokens,
117114
completion_tokens: numTokens(output),
@@ -120,10 +117,13 @@ export const PROVIDER_CONFIGS = {
120117

121118
google: {
122119
name: "Google GenAI",
123-
createClient: async (options, settings) => {
124-
const { google_vertexai_key: apiKey } = settings;
120+
createClient: async (options, settings, mode) => {
121+
const apiKey =
122+
mode === "cocalc" ? settings.google_vertexai_key : options.apiKey;
125123
const modelName =
126-
GOOGLE_MODEL_TO_ID[options.model as GoogleModel] ?? options.model;
124+
mode === "cocalc"
125+
? GOOGLE_MODEL_TO_ID[options.model as GoogleModel] ?? options.model
126+
: options.model;
127127

128128
log.debug(
129129
`Google createClient: original=${options.model}, modelName=${modelName}`,
@@ -140,67 +140,47 @@ export const PROVIDER_CONFIGS = {
140140
streaming: true,
141141
});
142142
},
143-
processModel: (model) => GOOGLE_MODEL_TO_ID[model as GoogleModel] ?? model,
144-
shouldContinueOnNonString: true,
145-
getTokenCountFallback: async (
146-
input,
147-
output,
148-
historyTokens,
149-
model,
150-
settings,
151-
) => {
152-
const { google_vertexai_key: apiKey } = settings;
153-
const modelName = GOOGLE_MODEL_TO_ID[model as GoogleModel] ?? model;
154-
155-
const genAI = new GoogleGenerativeAI(apiKey);
156-
const tokenCountingModel = genAI.getGenerativeModel({ model: modelName });
157-
158-
const { totalTokens: prompt_tokens } =
159-
await tokenCountingModel.countTokens([
160-
input,
161-
// Use historyTokens instead of recalculating
162-
]);
163-
164-
const { totalTokens: completion_tokens } =
165-
await tokenCountingModel.countTokens(output);
166-
167-
return {
168-
prompt_tokens: prompt_tokens + historyTokens,
169-
completion_tokens,
170-
};
171-
},
143+
canonicalModel: (model) =>
144+
GOOGLE_MODEL_TO_ID[model as GoogleModel] ?? model,
145+
getTokenCountFallback: async (input, output, historyTokens) => ({
146+
prompt_tokens: numTokens(input) + historyTokens,
147+
completion_tokens: numTokens(output),
148+
}),
172149
},
173150

174151
anthropic: {
175152
name: "Anthropic",
176-
createClient: async (options, settings) => {
177-
const { anthropic_api_key: apiKey } = settings;
178-
const modelVersion = ANTHROPIC_VERSION[options.model as AnthropicModel];
153+
createClient: async (options, settings, mode) => {
154+
const apiKey =
155+
mode === "cocalc" ? settings.anthropic_api_key : options.apiKey;
156+
const modelName =
157+
mode === "cocalc"
158+
? ANTHROPIC_VERSION[options.model as AnthropicModel]
159+
: options.model;
179160

180-
if (modelVersion == null) {
161+
if (modelName == null) {
181162
throw new Error(
182163
`Anthropic model ${options.model} is no longer supported`,
183164
);
184165
}
185166

186167
log.debug(
187-
`Anthropic createClient: original=${options.model}, modelVersion=${modelVersion}`,
168+
`Anthropic createClient: original=${options.model}, modelVersion=${modelName}`,
188169
);
189170

190171
return new ChatAnthropic({
191-
model: modelVersion,
192-
apiKey: options.apiKey || apiKey,
172+
model: modelName,
173+
apiKey,
193174
maxTokens: options.maxTokens,
194175
});
195176
},
196-
processModel: (model) => {
177+
canonicalModel: (model) => {
197178
const version = ANTHROPIC_VERSION[model as AnthropicModel];
198179
if (version == null) {
199180
throw new Error(`Anthropic model ${model} is no longer supported`);
200181
}
201182
return version;
202183
},
203-
shouldContinueOnNonString: true,
204184
getTokenCountFallback: async (input, output, historyTokens) => ({
205185
prompt_tokens: numTokens(input) + historyTokens,
206186
completion_tokens: numTokens(output),
@@ -209,17 +189,17 @@ export const PROVIDER_CONFIGS = {
209189

210190
mistral: {
211191
name: "Mistral",
212-
createClient: async (options, settings) => {
213-
const { mistral_api_key: apiKey } = settings;
192+
createClient: async (options, settings, mode) => {
193+
const apiKey =
194+
mode === "cocalc" ? settings.mistral_api_key : options.apiKey;
214195

215196
log.debug(`Mistral createClient: model=${options.model}`);
216197

217198
return new ChatMistralAI({
218199
model: options.model,
219-
apiKey: options.apiKey || apiKey,
200+
apiKey,
220201
});
221202
},
222-
shouldContinueOnNonString: true,
223203
getTokenCountFallback: async (input, output, historyTokens) => ({
224204
prompt_tokens: numTokens(input) + historyTokens,
225205
completion_tokens: numTokens(output),
@@ -235,8 +215,7 @@ export const PROVIDER_CONFIGS = {
235215
);
236216
return await getCustomOpenAI(transformedModel);
237217
},
238-
processModel: (model) => fromCustomOpenAIModel(model),
239-
shouldContinueOnNonString: false, // breaks on non-string content
218+
canonicalModel: (model) => fromCustomOpenAIModel(model),
240219
getTokenCountFallback: async (input, output, historyTokens) => ({
241220
prompt_tokens: numTokens(input) + historyTokens,
242221
completion_tokens: numTokens(output),
@@ -269,20 +248,17 @@ function content2string(content: any): string {
269248
const output0 = content[0];
270249
if (output0?.type === "text") {
271250
return output0.text;
272-
} else {
273-
log.debug("content2string unable to process", content);
274-
return "";
275251
}
276-
} else {
277-
log.debug("content2string unable to process", content);
278-
return "";
279252
}
253+
254+
log.debug("content2string unable to process", content);
255+
return "";
280256
}
281257

282258
// Main unified evaluation function
283259
export async function evaluateWithLangChain(
284260
options: LLMEvaluationOptions,
285-
_mode: "cocalc" | "user" = "cocalc",
261+
mode: "cocalc" | "user" = "cocalc",
286262
): Promise<ChatOutput> {
287263
const { input, system, history = [], model, stream, maxTokens } = options;
288264

@@ -302,10 +278,12 @@ export async function evaluateWithLangChain(
302278
const settings = await getServerSettings();
303279

304280
// Create LangChain client
305-
const client = await config.createClient(options, settings);
281+
const client = await config.createClient(options, settings, mode);
306282

307-
// Process model name if needed (processed model not currently used in this function)
308-
// const _processedModel = config.processModel ? config.processModel(model) : model;
283+
// Canonical model name
284+
const canonicalModel = config.canonicalModel
285+
? config.canonicalModel(model)
286+
: model;
309287

310288
// Determine system role (always use "history" for historyKey)
311289
const systemRole = config.getSystemRole
@@ -316,8 +294,8 @@ export async function evaluateWithLangChain(
316294

317295
// Create prompt template
318296
// For o1 models, omit the system message entirely since they don't support system roles
319-
const isO1Model = model.includes("o1");
320-
const prompt = isO1Model
297+
const isO1 = isO1Model(canonicalModel);
298+
const prompt = isO1
321299
? ChatPromptTemplate.fromMessages([
322300
new MessagesPlaceholder(historyMessagesKey),
323301
["human", system ? `${system}\n\n{input}` : "{input}"],
@@ -347,15 +325,10 @@ export async function evaluateWithLangChain(
347325
},
348326
});
349327

350-
// Handle streaming vs non-streaming
351-
const supportsStreaming = config.supportsStreaming
352-
? config.supportsStreaming(model)
353-
: true;
354-
355328
let finalResult: AIMessageChunk | undefined;
356329
let output = "";
357330

358-
if (stream && supportsStreaming) {
331+
if (stream) {
359332
// Streaming mode
360333
const chunks = await chainWithHistory.stream({ input });
361334

@@ -364,10 +337,6 @@ export async function evaluateWithLangChain(
364337
const { content } = chunkTyped;
365338
const contentStr = content2string(content);
366339

367-
if (typeof content !== "string" && !config.shouldContinueOnNonString) {
368-
break;
369-
}
370-
371340
if (typeof content === "string") {
372341
output += content;
373342
stream(content);

src/packages/server/llm/test/models.test.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ import { evaluateOpenAILC } from "../openai-lc";
2929
import { evaluateUserDefinedLLM } from "../user-defined";
3030
import { enableModels, setupAPIKeys, test_llm } from "./shared";
3131

32-
const LLM_TIMEOUT = 10_000;
32+
// sometimes (flaky case) they take more than 10s to even start a response
33+
const LLM_TIMEOUT = 15_000;
3334

3435
beforeAll(async () => {
3536
await initEphemeralDatabase();
@@ -143,18 +144,18 @@ test_llm("openai")("OpenAI", () => {
143144
LLM_TIMEOUT,
144145
);
145146
test(
146-
"openai 4.1 mini works",
147+
"4.1 mini works",
147148
async () => {
148-
llmOpenAI("gpt-4.1-mini");
149+
await llmOpenAI("gpt-4.1-mini");
149150
},
150151
LLM_TIMEOUT,
151152
);
152153

153-
test("openai o1", async () => {
154+
test("o1", async () => {
154155
await llmOpenAI("o1-8k");
155156
});
156157

157-
test("gpt o1 mini works", async () => {
158+
test("o1 mini works", async () => {
158159
await llmOpenAI("o1-mini-8k");
159160
});
160161
});

0 commit comments

Comments
 (0)