Skip to content

Commit 491cc9c

Browse files
haraldschillyclaude
andcommitted
llm/test: add user-defined LLM tests with all provider support
- Add comprehensive test suite for user-defined LLMs - Test OpenAI, Google, Anthropic, Mistral, and custom OpenAI models - Create ephemeral test database account with proper user-defined LLM config storage - Use environment variables for API keys (COCALC_TEST_*_KEY) - Tests validate end-to-end functionality from database storage to LLM evaluation - Update Anthropic model to use claude-3-5-haiku-latest alias - All 5 user-defined LLM tests passing 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 02d7498 commit 491cc9c

File tree

2 files changed

+226
-2
lines changed

2 files changed

+226
-2
lines changed

src/packages/server/llm/evaluate-lc.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,9 @@ export async function evaluateWithLangChain(
339339
inputMessagesKey: "input",
340340
historyMessagesKey,
341341
getMessageHistory: async () => {
342-
const { messageHistory, tokens } =
343-
await transformHistoryToMessages(history);
342+
const { messageHistory, tokens } = await transformHistoryToMessages(
343+
history,
344+
);
344345
historyTokens = tokens;
345346
return messageHistory;
346347
},

src/packages/server/llm/test/models.test.ts

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,23 @@ import {
1010
isGoogleModel,
1111
isMistralModel,
1212
isOpenAIModel,
13+
UserDefinedLLM,
14+
toUserLLMModelName,
1315
} from "@cocalc/util/db-schema/llm-utils";
1416
import { evaluateGoogleGenAI } from "..";
1517
import { evaluateAnthropic } from "../anthropic";
1618
import { getClient } from "../client";
19+
import createAccount from "../../accounts/create-account";
20+
import { db } from "@cocalc/database";
21+
import { callback2 } from "@cocalc/util/async-utils";
22+
import { OTHER_SETTINGS_USERDEFINED_LLM } from "@cocalc/util/db-schema/defaults";
23+
import { uuid } from "@cocalc/util/misc";
1724
import { evaluateWithLangChain } from "../evaluate-lc";
1825
import { GoogleGenAIClient } from "../google-genai-client";
1926
import { USE_NEWER_LC_IMPL } from "../index";
2027
import { evaluateMistral } from "../mistral";
2128
import { evaluateOpenAILC } from "../openai-lc";
29+
import { evaluateUserDefinedLLM } from "../user-defined";
2230
import { enableModels, setupAPIKeys, test_llm } from "./shared";
2331

2432
const LLM_TIMEOUT = 10_000;
@@ -279,3 +287,218 @@ test_llm("anthropic")("Anthropic", () => {
279287
LLM_TIMEOUT,
280288
);
281289
});
290+
291+
// User-defined LLM tests
292+
describe("User-defined LLMs", () => {
293+
const account_id = uuid();
294+
let accountCreated = false;
295+
296+
beforeAll(async () => {
297+
// Create test account only once for the entire describe block
298+
if (!accountCreated) {
299+
await createAccount({
300+
email: `test-${account_id}@example.com`,
301+
password: "testpass123",
302+
firstName: "Test",
303+
lastName: "User",
304+
account_id,
305+
});
306+
accountCreated = true;
307+
}
308+
309+
// Enable user-defined LLMs server setting
310+
await callback2(db().set_server_setting, {
311+
name: "user_defined_llm",
312+
value: "yes",
313+
readonly: true,
314+
});
315+
});
316+
317+
async function createUserDefinedLLMConfig(configs: UserDefinedLLM[]) {
318+
const userDefinedLLMJson = JSON.stringify(configs);
319+
const pool = getPool();
320+
await pool.query(
321+
`UPDATE accounts SET other_settings = jsonb_set(
322+
COALESCE(other_settings, '{}'::jsonb),
323+
'{${OTHER_SETTINGS_USERDEFINED_LLM}}',
324+
to_jsonb($1::text)
325+
) WHERE account_id = $2`,
326+
[userDefinedLLMJson, account_id],
327+
);
328+
}
329+
330+
// Test user-defined OpenAI model
331+
test(
332+
"user-defined OpenAI model works",
333+
async () => {
334+
const openaiKey = process.env.COCALC_TEST_OPENAI_KEY;
335+
if (!openaiKey) {
336+
console.log("Skipping user-defined OpenAI test - no API key");
337+
return;
338+
}
339+
340+
const config: UserDefinedLLM = {
341+
id: 1,
342+
service: "openai",
343+
display: "Test GPT-4o Mini",
344+
endpoint: "https://api.openai.com/v1",
345+
model: "gpt-4o-mini",
346+
apiKey: openaiKey,
347+
};
348+
349+
await createUserDefinedLLMConfig([config]);
350+
351+
const userModel = toUserLLMModelName(config);
352+
const answer = await evaluateUserDefinedLLM(
353+
{
354+
model: userModel,
355+
...QUERY,
356+
},
357+
account_id,
358+
);
359+
360+
checkAnswer(answer);
361+
},
362+
LLM_TIMEOUT,
363+
);
364+
365+
// Test user-defined Google model
366+
test(
367+
"user-defined Google model works",
368+
async () => {
369+
const googleKey = process.env.COCALC_TEST_GOOGLE_GENAI_KEY;
370+
if (!googleKey) {
371+
console.log("Skipping user-defined Google test - no API key");
372+
return;
373+
}
374+
375+
const config: UserDefinedLLM = {
376+
id: 2,
377+
service: "google",
378+
display: "Test Gemini Flash",
379+
endpoint: "",
380+
model: "gemini-1.5-flash",
381+
apiKey: googleKey,
382+
};
383+
384+
await createUserDefinedLLMConfig([config]);
385+
386+
const userModel = toUserLLMModelName(config);
387+
const answer = await evaluateUserDefinedLLM(
388+
{
389+
model: userModel,
390+
...QUERY,
391+
},
392+
account_id,
393+
);
394+
395+
checkAnswer(answer);
396+
},
397+
LLM_TIMEOUT,
398+
);
399+
400+
// Test user-defined Anthropic model
401+
test(
402+
"user-defined Anthropic model works",
403+
async () => {
404+
const anthropicKey = process.env.COCALC_TEST_ANTHROPIC_KEY;
405+
if (!anthropicKey) {
406+
console.log("Skipping user-defined Anthropic test - no API key");
407+
return;
408+
}
409+
410+
const config: UserDefinedLLM = {
411+
id: 3,
412+
service: "anthropic",
413+
display: "claude-3-5-haiku-latest",
414+
endpoint: "",
415+
model: "claude-3-5-haiku-latest",
416+
apiKey: anthropicKey,
417+
};
418+
419+
await createUserDefinedLLMConfig([config]);
420+
421+
const userModel = toUserLLMModelName(config);
422+
const answer = await evaluateUserDefinedLLM(
423+
{
424+
model: userModel,
425+
...QUERY,
426+
},
427+
account_id,
428+
);
429+
430+
checkAnswer(answer);
431+
},
432+
LLM_TIMEOUT,
433+
);
434+
435+
// Test user-defined Mistral model
436+
test(
437+
"user-defined Mistral model works",
438+
async () => {
439+
const mistralKey = process.env.COCALC_TEST_MISTRAL_AI_KEY;
440+
if (!mistralKey) {
441+
console.log("Skipping user-defined Mistral test - no API key");
442+
return;
443+
}
444+
445+
const config: UserDefinedLLM = {
446+
id: 4,
447+
service: "mistralai",
448+
display: "Test Mistral Small",
449+
endpoint: "",
450+
model: "mistral-small-latest",
451+
apiKey: mistralKey,
452+
};
453+
454+
await createUserDefinedLLMConfig([config]);
455+
456+
const userModel = toUserLLMModelName(config);
457+
const answer = await evaluateUserDefinedLLM(
458+
{
459+
model: userModel,
460+
...QUERY,
461+
},
462+
account_id,
463+
);
464+
465+
checkAnswer(answer);
466+
},
467+
LLM_TIMEOUT,
468+
);
469+
470+
// Test user-defined custom OpenAI model
471+
test(
472+
"user-defined custom OpenAI model works",
473+
async () => {
474+
const openaiKey = process.env.COCALC_TEST_OPENAI_KEY;
475+
if (!openaiKey) {
476+
console.log("Skipping user-defined custom OpenAI test - no API key");
477+
return;
478+
}
479+
480+
const config: UserDefinedLLM = {
481+
id: 5,
482+
service: "custom_openai",
483+
display: "Test Custom GPT-4o",
484+
endpoint: "https://api.openai.com/v1",
485+
model: "gpt-4o",
486+
apiKey: openaiKey,
487+
};
488+
489+
await createUserDefinedLLMConfig([config]);
490+
491+
const userModel = toUserLLMModelName(config);
492+
const answer = await evaluateUserDefinedLLM(
493+
{
494+
model: userModel,
495+
...QUERY,
496+
},
497+
account_id,
498+
);
499+
500+
checkAnswer(answer);
501+
},
502+
LLM_TIMEOUT,
503+
);
504+
});

0 commit comments

Comments
 (0)