diff --git a/src/.claude/settings.local.json b/src/.claude/settings.local.json index d69e67b58f2..11bf37a4f99 100644 --- a/src/.claude/settings.local.json +++ b/src/.claude/settings.local.json @@ -4,7 +4,18 @@ "Bash(pnpm tsc:*)", "Bash(pnpm build:*)", "Bash(git add:*)", - "Bash(git commit:*)" + "Bash(git commit:*)", + "Bash(node:*)", + "Bash(grep:*)", + "Bash(find:*)", + "WebFetch(domain:github.com)", + "WebFetch(domain:cocalc.com)", + "WebFetch(domain:doc.cocalc.com)", + "Bash(npm show:*)", + "Bash(prettier -w:*)", + "Bash(npx tsc:*)", + "Bash(gh pr view:*)", + "Bash(gh:*)" ], "deny": [] } diff --git a/src/packages/frontend/admin/_style.sass b/src/packages/frontend/admin/_style.sass index 5af8b6f26a6..80e020a52c2 100644 --- a/src/packages/frontend/admin/_style.sass +++ b/src/packages/frontend/admin/_style.sass @@ -13,3 +13,6 @@ td:first-child font-family: monospace font-weight: bold + +.admin-llm-test-running-row + background-color: #f0f0f0 !important diff --git a/src/packages/frontend/admin/llm/admin-llm-test.tsx b/src/packages/frontend/admin/llm/admin-llm-test.tsx new file mode 100644 index 00000000000..66718e15418 --- /dev/null +++ b/src/packages/frontend/admin/llm/admin-llm-test.tsx @@ -0,0 +1,400 @@ +import { Alert, Button, Input, Select, Space, Table } from "antd"; + +import { + redux, + useAsyncEffect, + useState, + useTypedRedux, +} from "@cocalc/frontend/app-framework"; +import { Icon, Loading, Paragraph, Title } from "@cocalc/frontend/components"; +import { LLMModelName } from "@cocalc/frontend/components/llm-name"; +import { Markdown } from "@cocalc/frontend/markdown"; +import { webapp_client } from "@cocalc/frontend/webapp-client"; +import { + USER_SELECTABLE_LLMS_BY_VENDOR, + isCoreLanguageModel, + toCustomOpenAIModel, + toOllamaModel, +} from "@cocalc/util/db-schema/llm-utils"; +import { trunc_middle } from "@cocalc/util/misc"; +import { COLORS } from "@cocalc/util/theme"; +import { PROMPTS } from "./tests"; +import { Value } from "./value"; + +interface TestResult { + model: string; + status: "pending" | "running" | "passed" | "failed"; + output: string; + error?: string; +} + +export function TestLLMAdmin() { + const customize = redux.getStore("customize"); + const globallyEnabledLLMs = customize.getEnabledLLMs(); + const selectableLLMs = useTypedRedux("customize", "selectable_llms"); + const ollama = useTypedRedux("customize", "ollama"); + const custom_openai = useTypedRedux("customize", "custom_openai"); + const [test, setTest] = useState(0); + const [querying, setQuerying] = useState(false); + const [testResults, setTestResults] = useState([]); + const [currentTestIndex, setCurrentTestIndex] = useState(0); + + // Initialize test results on component mount or when test changes + useAsyncEffect(() => { + if (test !== null) { + const allModels = getAllModels(); + const initialResults: TestResult[] = allModels.map((model) => ({ + model, + status: "pending", + output: "", + })); + setTestResults(initialResults); + } else { + setTestResults([]); + } + }, [test, custom_openai, ollama, selectableLLMs]); + + function getAllModels(): string[] { + const models: string[] = []; + + // Get core models + Object.entries(USER_SELECTABLE_LLMS_BY_VENDOR).forEach(([vendor, llms]) => { + if (vendor !== "ollama" && vendor !== "custom_openai") { + llms.filter(isCoreLanguageModel).forEach((llm) => { + models.push(llm); + }); + } + }); + + // Get custom OpenAI models + Object.entries(custom_openai?.toJS() ?? {}).forEach(([key, _val]) => { + const model = toCustomOpenAIModel(key); + models.push(model); + }); + + // Get Ollama models + Object.entries(ollama?.toJS() ?? {}).forEach(([key, _val]) => { + const model = toOllamaModel(key); + models.push(model); + }); + + return models; + } + + function getEnabledModels(): string[] { + return getAllModels().filter((model) => { + // Check if model is enabled in selectable LLMs + if (isCoreLanguageModel(model)) { + return selectableLLMs.includes(model); + } + // Custom OpenAI and Ollama models are always considered enabled if configured + return true; + }); + } + + async function runTestForModel( + model: string, + testConfig: any, + ): Promise { + const { prompt, expected, system, history } = testConfig; + const expectedRegex = new RegExp(expected, "g"); + + return new Promise((resolve) => { + try { + const llmStream = webapp_client.openai_client.queryStream({ + input: prompt, + project_id: null, + tag: "admin-llm-test", + model, + system, + history, + maxTokens: 20, + }); + + let reply = ""; + + llmStream.on("token", (token) => { + console.log({ model, system, token }); + if (token != null) { + reply += token; + // Update the result in real-time + setTestResults((prev) => + prev.map((r) => + r.model === model ? { ...r, output: reply } : r, + ), + ); + } else { + // Stream is complete (token is null) + const passed = expectedRegex.test(reply); + resolve({ + model, + status: passed ? "passed" : "failed", + output: reply, + }); + } + }); + + llmStream.on("error", (err) => { + console.error(`Error in LLM stream for model ${model}:`, err); + resolve({ + model, + status: "failed", + output: reply, + error: err?.toString(), + }); + }); + + // Start the stream + llmStream.emit("start"); + } catch (err) { + console.error(`Error running test for model ${model}:`, err); + resolve({ + model, + status: "failed", + output: "", + error: err?.toString(), + }); + } + }); + } + + async function runSingleTest(model: string) { + if (test === null) return; + + const testConfig = PROMPTS[test]; + + // Find the model in the results and update its status + const modelIndex = testResults.findIndex((r) => r.model === model); + if (modelIndex === -1) return; + + setCurrentTestIndex(modelIndex); + + // Update status to running + setTestResults((prev) => + prev.map((r, idx) => + idx === modelIndex + ? { ...r, status: "running", output: "", error: undefined } + : r, + ), + ); + + const result = await runTestForModel(model, testConfig); + + // Update with final result + setTestResults((prev) => + prev.map((r, idx) => (idx === modelIndex ? result : r)), + ); + } + + async function runSequentialTests() { + if (test === null) return; + + const models = getEnabledModels(); + const testConfig = PROMPTS[test]; + + // Initialize results + const initialResults: TestResult[] = models.map((model) => ({ + model, + status: "pending", + output: "", + })); + + setTestResults(initialResults); + setQuerying(true); + setCurrentTestIndex(0); + + // Run tests sequentially + for (let i = 0; i < models.length; i++) { + setCurrentTestIndex(i); + + // Update status to running + setTestResults((prev) => + prev.map((r, idx) => (idx === i ? { ...r, status: "running" } : r)), + ); + + const result = await runTestForModel(models[i], testConfig); + + // Update with final result + setTestResults((prev) => prev.map((r, idx) => (idx === i ? result : r))); + + // Add delay between tests to avoid rate limiting + if (i < models.length - 1) { + await new Promise((resolve) => setTimeout(resolve, 100)); + } + } + + setQuerying(false); + } + + function renderTestResultIcon(status: TestResult["status"]) { + switch (status) { + case "pending": + return ; + case "running": + return ; + case "passed": + return ; + case "failed": + return ; + default: + return ; + } + } + + function renderTestResults() { + if (testResults.length === 0) { + return ( + + Click "Run Tests" to execute the selected test on all enabled models. + + ); + } + + const columns = [ + { + title: "Status", + dataIndex: "status", + key: "status", + width: 80, + render: (status: TestResult["status"]) => renderTestResultIcon(status), + }, + { + title: "Model", + dataIndex: "model", + key: "model", + width: 180, + render: (model: string /*, record: TestResult*/) => ( + + + {/* {record.status === "running" && (Running...)} */} + + ), + }, + { + title: "Output", + dataIndex: "output", + key: "output", + render: (output: string) => + output ? ( + + ) : ( + - + ), + }, + { + title: "Error", + dataIndex: "error", + key: "error", + render: (error: string) => + error ? ( + + ) : ( + - + ), + }, + { + title: "Test", + key: "test", + width: 80, + render: (_, record: TestResult) => { + const isEnabled = getEnabledModels().includes(record.model); + const isRunning = record.status === "running"; + const isQuerying = querying && record.status === "running"; + + return ( + + ); + }, + }, + ]; + + const dataSource = testResults.map((result, index) => ({ + ...result, + key: result.model, + // Add row styling for currently running test + className: + index === currentTestIndex && querying ? "running-row" : undefined, + })); + + return ( +
+ Test Results + + index === currentTestIndex && querying + ? "admin-llm-test-running-row" + : "" + } + style={{ marginTop: "10px" }} + /> + + ); + } + + return ( +
+ + Globally enabled LLMs (Admin Settings): + . + + + + setTest(parseInt(e.target.value))} + placeholder="Enter a query..." + addonAfter={ + + } + /> + + + + + + {renderTestResults()} + + Ollama configuration + + Custom OpenAI API + +
+ ); +} diff --git a/src/packages/frontend/admin/llm/index.tsx b/src/packages/frontend/admin/llm/index.tsx deleted file mode 100644 index 94523e20440..00000000000 --- a/src/packages/frontend/admin/llm/index.tsx +++ /dev/null @@ -1,197 +0,0 @@ -import { Button, Col, Input, Row, Select, Space, Switch } from "antd"; - -import { - CSS, - redux, - useState, - useTypedRedux, -} from "@cocalc/frontend/app-framework"; -import { Paragraph, Title } from "@cocalc/frontend/components"; -import { LLMModelName } from "@cocalc/frontend/components/llm-name"; -import { - LLMServiceName, - LLM_PROVIDER, - LanguageModelCore, - USER_SELECTABLE_LLMS_BY_VENDOR, - isCoreLanguageModel, - toCustomOpenAIModel, - toOllamaModel, -} from "@cocalc/util/db-schema/llm-utils"; -import { getRandomColor, trunc_middle } from "@cocalc/util/misc"; -import { TestLLM } from "./test-component"; -import { PROMPTS } from "./tests"; -import { Value } from "./value"; - -export function TestLLMAdmin() { - const customize = redux.getStore("customize"); - const globallyEnabledLLMs = customize.getEnabledLLMs(); - const selectableLLMs = useTypedRedux("customize", "selectable_llms"); - const ollama = useTypedRedux("customize", "ollama"); - const custom_openai = useTypedRedux("customize", "custom_openai"); - const [test, setTest] = useState(0); - // TODO: this is used to trigger sending queries – makes no sense that all of them disable it. fix this. - const [querying, setQuerying] = useState(); - const [all, setAll] = useState(false); - - function llmStyle(llm: string): CSS { - return { - marginLeft: "5px", - marginBottom: "5px", - borderLeft: `5px solid ${getRandomColor(llm, { - min: 0, - max: 255, - diff: 100, - })}`, - }; - } - - function renderStatus(llm: LanguageModelCore, vendor: LLMServiceName) { - const enabled = all || selectableLLMs.includes(llm); - - return ( - - - - - - - - {enabled ? ( - - ) : undefined} - - - ); - } - - function renderCustomOpenAI() { - return ( - - Custom OpenAI - {Object.entries(custom_openai?.toJS() ?? {}).map(([key, _val]) => { - const model = toCustomOpenAIModel(key); - - return ( - - - - - - - - - - - ); - })} - - ); - } - - function renderOllama() { - return ( - - Ollama - {Object.entries(ollama?.toJS() ?? {}).map(([key, _val]) => { - const model = toOllamaModel(key); - - return ( - - - - - - - - - - - ); - })} - - ); - } - - return ( -
- - Globally enabled LLMs (Admin Settings): - . - - - - setTest(parseInt(e.target.value))} - placeholder="Enter a query..." - addonAfter={ - - } - /> - - - setAll(e)} /> All - - - - - {Object.entries(USER_SELECTABLE_LLMS_BY_VENDOR).map( - ([vendor, llms]) => - vendor !== "ollama" && vendor !== "custom_openai" ? ( -
- {LLM_PROVIDER[vendor].name} - {llms - .filter(isCoreLanguageModel) - .map((llm) => renderStatus(llm, vendor as LLMServiceName))} - - ) : undefined, - )} - {renderOllama()} - {renderCustomOpenAI()} - - - - Ollama configuration - - Custom OpenAI API - - - ); -} diff --git a/src/packages/frontend/admin/llm/test-component.tsx b/src/packages/frontend/admin/llm/test-component.tsx deleted file mode 100644 index cab5496bc40..00000000000 --- a/src/packages/frontend/admin/llm/test-component.tsx +++ /dev/null @@ -1,122 +0,0 @@ -import { Alert, Space } from "antd"; -import { throttle } from "lodash"; - -import { - useAsyncEffect, - useEffect, - useState, -} from "@cocalc/frontend/app-framework"; -import { Icon, Loading } from "@cocalc/frontend/components"; -import { Markdown } from "@cocalc/frontend/markdown"; -import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { LanguageModelCore } from "@cocalc/util/db-schema/llm-utils"; -import { PROMPTS } from "./tests"; -import { Value } from "./value"; - -interface TestLLMProps { - model: LanguageModelCore | string; - test: number | null; - queryState: [boolean | undefined, (val: boolean) => void]; -} - -export function TestLLM({ model, test, queryState }: TestLLMProps) { - const [querying, setQuerying] = queryState; - const [output, setOutput] = useState(""); - const [error, setError] = useState(""); - const [passed, setPassed] = useState(); - - const { - prompt, - expected, - system = undefined, - history = undefined, - } = typeof test === "number" ? PROMPTS[test] : { prompt: "", expected: "" }; - const expectedRegex = new RegExp(expected, "g"); - - const check = throttle( - () => { - if (passed != null && output.trim() === "") { - setPassed(undefined); - } else if (expectedRegex.test(output) && !passed) { - setPassed(true); - } - }, - 250, - { - leading: false, - trailing: true, - }, - ); - - useEffect(() => { - if (prompt.trim() === "") { - setOutput(""); - setError(""); - setPassed(undefined); - } - }, [prompt, test]); - - useEffect(() => { - check(); - }, [output]); - - useAsyncEffect(async () => { - if (!querying || prompt.trim() === "") { - querying && setQuerying(false); - setError(""); - return; - } - - try { - setPassed(undefined); - const llmStream = webapp_client.openai_client.queryStream({ - input: prompt, - project_id: null, - tag: "admin-llm-test", - model, - system, - history, - maxTokens: 20, - }); - - let reply = ""; - llmStream.on("token", (token) => { - if (token) { - reply += token; - setOutput(reply); - } - }); - - llmStream.on("error", (err) => { - setPassed(false); - setError(err?.toString()); - setQuerying(false); - }); - } catch (err) { - setError(err?.toString()); - } finally { - setQuerying(false); - } - }, [querying]); - - function renderPassed() { - if (typeof passed === "boolean") { - return ; - } else { - return ; - } - } - - if (querying) { - return ; - } - - return ( - <> - - {renderPassed()} - - {error ? : undefined} - - ); -} diff --git a/src/packages/frontend/admin/page.tsx b/src/packages/frontend/admin/page.tsx index fcdeac3cdd0..99bba3301bc 100644 --- a/src/packages/frontend/admin/page.tsx +++ b/src/packages/frontend/admin/page.tsx @@ -14,7 +14,7 @@ import { UsageStatistics } from "./stats/page"; import { SystemNotifications } from "./system-notifications"; import { UserSearch } from "./users/user-search"; import AIAvatar from "@cocalc/frontend/components/ai-avatar"; -import { TestLLMAdmin } from "./llm"; +import { TestLLMAdmin } from "./llm/admin-llm-test"; const headerStyle = { fontSize: "12pt" } as const; diff --git a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx index 2e50f5f015b..4b135f3f477 100644 --- a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx +++ b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx @@ -330,7 +330,7 @@ export function AIGenerateCodeCell({ if (cancel.current) { // we abort this stream.removeAllListeners(); - // singal "finalization" + // single "finalization" updateCells(answer); return; } diff --git a/src/packages/frontend/misc/llm.ts b/src/packages/frontend/misc/llm.ts index 4f1d28ef01f..15e10d08287 100644 --- a/src/packages/frontend/misc/llm.ts +++ b/src/packages/frontend/misc/llm.ts @@ -12,7 +12,7 @@ export { getMaxTokens }; // about 5 characters long on average, and there is a space character between // each word. So, for every 6 characters, there is approximately one token." // Using this, our 250,000 character text gets truncated down to 6*4096 ~ 25,000 -// and then runnin the tokenizer is fast: it takes 62ms instead of nearly 6 seconds! +// and then running the tokenizer is fast: it takes 62ms instead of nearly 6 seconds! // if 6 is about right, 8 should be a good upper bound. const APPROX_CHARACTERS_PER_TOKEN = 8; diff --git a/src/packages/pnpm-lock.yaml b/src/packages/pnpm-lock.yaml index 1fe4499fe01..5a163627777 100644 --- a/src/packages/pnpm-lock.yaml +++ b/src/packages/pnpm-lock.yaml @@ -1205,23 +1205,23 @@ importers: specifier: ^1.4.1 version: 1.4.1 '@langchain/anthropic': - specifier: ^0.3.18 - version: 0.3.24(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76))) + specifier: ^0.3.24 + version: 0.3.24(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76))) '@langchain/core': specifier: ^0.3.46 - version: 0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) + version: 0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) '@langchain/google-genai': - specifier: ^0.2.4 - version: 0.2.14(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76))) + specifier: ^0.2.15 + version: 0.2.15(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76))) '@langchain/mistralai': - specifier: ^0.2.0 - version: 0.2.1(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))(zod@3.25.76) + specifier: ^0.2.1 + version: 0.2.1(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))(zod@3.25.76) '@langchain/ollama': - specifier: ^0.2.0 - version: 0.2.3(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76))) + specifier: ^0.2.3 + version: 0.2.3(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76))) '@langchain/openai': - specifier: ^0.5.5 - version: 0.5.18(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))(ws@8.18.3) + specifier: ^0.6.1 + version: 0.6.1(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))(ws@8.18.3) '@node-saml/passport-saml': specifier: ^5.0.1 version: 5.0.1 @@ -1331,7 +1331,7 @@ importers: specifier: ^6.9.16 version: 6.10.1 openai: - specifier: ^4.95.1 + specifier: ^4.104.0 version: 4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76) parse-domain: specifier: ^5.0.0 @@ -3023,12 +3023,12 @@ packages: peerDependencies: '@langchain/core': ^0.3.46 - '@langchain/core@0.3.62': - resolution: {integrity: sha512-GqRTcoUPnozGRMUcA6QkP7LHL/OvanGdB51Jgb0w7IIPDI3wFugxMHZ4gphnGDtxsD1tQY5ykyEpYNxFK8kl1w==} + '@langchain/core@0.3.64': + resolution: {integrity: sha512-KOHTnmycOPfuffLAm3wwv1rThQ47iG5a3HuWMt2qYhwwImFi6HLeYqKgmxVS5qcJjc6t0IPwR7jOvv9IKxfrAw==} engines: {node: '>=18'} - '@langchain/google-genai@0.2.14': - resolution: {integrity: sha512-gKe/T2LNh8wSSMJOaFmYd8cwQnDSXKtVtC6a7CFoq5nWuh0bKzhItM/7bue1aMN8mlKfB2G1HCwxhaZoSpS/DA==} + '@langchain/google-genai@0.2.15': + resolution: {integrity: sha512-fAD3xjzd5TxWQCKlttNeEc+b5tUX43hBqKH3rk3g+wbl1ToLqe3ocWawKRmGotEuI5jhDVmoHjDxoNMifFDgmg==} engines: {node: '>=18'} peerDependencies: '@langchain/core': ^0.3.46 @@ -3045,8 +3045,8 @@ packages: peerDependencies: '@langchain/core': ^0.3.46 - '@langchain/openai@0.5.18': - resolution: {integrity: sha512-CX1kOTbT5xVFNdtLjnM0GIYNf+P7oMSu+dGCFxxWRa3dZwWiuyuBXCm+dToUGxDLnsHuV1bKBtIzrY1mLq/A1Q==} + '@langchain/openai@0.6.1': + resolution: {integrity: sha512-jm8MzMEjAKPReYma4Lewb9vGnocKbhoClqPuRTxtKPDgqQ5yJWSisNy4iZO/a1d6ag/7MnxwKMjVsJdy1cBsxw==} engines: {node: '>=18'} peerDependencies: '@langchain/core': ^0.3.46 @@ -4656,8 +4656,8 @@ packages: access-control@1.0.1: resolution: {integrity: sha512-H5aqjkogmFxfaOrfn/e42vyspHVXuJ8er63KuljJXpOyJ1ZO/U5CrHfO8BLKIy2w7mBM02L5quL0vbfQqrGQbA==} - acorn-import-phases@1.0.3: - resolution: {integrity: sha512-jtKLnfoOzm28PazuQ4dVBcE9Jeo6ha1GAJvq3N0LlNOszmTfx+wSycBehn+FN0RnyeR77IBxN/qVYMw0Rlj0Xw==} + acorn-import-phases@1.0.4: + resolution: {integrity: sha512-wKmbr/DDiIXzEOiWrTTUcDm24kQ2vGfZQvM2fwg2vXqR5uW6aapr7ObPtj1th32b9u90/Pf4AItvdTh42fBmVQ==} engines: {node: '>=10.13.0'} peerDependencies: acorn: ^8.14.0 @@ -8194,8 +8194,8 @@ packages: langs@2.0.0: resolution: {integrity: sha512-v4pxOBEQVN1WBTfB1crhTtxzNLZU9HPWgadlwzWKISJtt6Ku/CnpBrwVy+jFv8StjxsPfwPFzO0CMwdZLJ0/BA==} - langsmith@0.3.44: - resolution: {integrity: sha512-LMCZ7ULSzIpDmsrxGZKzCpp8exuempvCFCX1N0m+u517ZhikPDEtAtgnREObMjIISzB7eXkODkFq0Klxc9FODg==} + langsmith@0.3.46: + resolution: {integrity: sha512-Hhi4/cMjhWIGpu0DW5eQrXBbeeKQWPYYQyJCYzhFjod+xinMry4i8QR0gxrrgjGOgfMuU6nyK79YqjGTEPVbDA==} peerDependencies: '@opentelemetry/api': '*' '@opentelemetry/exporter-trace-otlp-proto': '*' @@ -8773,6 +8773,7 @@ packages: node-domexception@1.0.0: resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} engines: {node: '>=10.5.0'} + deprecated: Use your platform's native DOMException instead node-fetch@2.7.0: resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} @@ -8944,8 +8945,8 @@ packages: zod: optional: true - openai@5.9.0: - resolution: {integrity: sha512-cmLC0pfqLLhBGxE4aZPyRPjydgYCncppV2ClQkKmW79hNjCvmzkfhz8rN5/YVDmjVQlFV+UsF1JIuNjNgeagyQ==} + openai@5.10.1: + resolution: {integrity: sha512-fq6xVfv1/gpLbsj8fArEt3b6B9jBxdhAK+VJ+bDvbUvNd+KTLlA3bnDeYZaBsGH9LUhJ1M1yXfp9sEyBLMx6eA==} hasBin: true peerDependencies: ws: ^8.18.0 @@ -13095,20 +13096,20 @@ snapshots: '@lumino/properties': 2.0.3 '@lumino/signaling': 2.1.4 - '@langchain/anthropic@0.3.24(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))': + '@langchain/anthropic@0.3.24(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))': dependencies: '@anthropic-ai/sdk': 0.56.0 - '@langchain/core': 0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) + '@langchain/core': 0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) fast-xml-parser: 4.5.3 - '@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76))': + '@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76))': dependencies: '@cfworker/json-schema': 4.1.1 ansi-styles: 5.2.0 camelcase: 6.3.0 decamelize: 1.2.0 js-tiktoken: 1.0.20 - langsmith: 0.3.44(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) + langsmith: 0.3.46(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) mustache: 4.2.0 p-queue: 6.6.2 p-retry: 4.6.2 @@ -13121,31 +13122,31 @@ snapshots: - '@opentelemetry/sdk-trace-base' - openai - '@langchain/google-genai@0.2.14(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))': + '@langchain/google-genai@0.2.15(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))': dependencies: '@google/generative-ai': 0.24.1 - '@langchain/core': 0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) + '@langchain/core': 0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) uuid: 11.1.0 - '@langchain/mistralai@0.2.1(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))(zod@3.25.76)': + '@langchain/mistralai@0.2.1(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))(zod@3.25.76)': dependencies: - '@langchain/core': 0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) + '@langchain/core': 0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) '@mistralai/mistralai': 1.7.4(zod@3.25.76) uuid: 10.0.0 transitivePeerDependencies: - zod - '@langchain/ollama@0.2.3(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))': + '@langchain/ollama@0.2.3(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))': dependencies: - '@langchain/core': 0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) + '@langchain/core': 0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) ollama: 0.5.16 uuid: 10.0.0 - '@langchain/openai@0.5.18(@langchain/core@0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))(ws@8.18.3)': + '@langchain/openai@0.6.1(@langchain/core@0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)))(ws@8.18.3)': dependencies: - '@langchain/core': 0.3.62(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) + '@langchain/core': 0.3.64(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)) js-tiktoken: 1.0.20 - openai: 5.9.0(ws@8.18.3)(zod@3.25.76) + openai: 5.10.1(ws@8.18.3)(zod@3.25.76) zod: 3.25.76 transitivePeerDependencies: - ws @@ -14997,7 +14998,7 @@ snapshots: setheader: 1.0.2 vary: 1.1.2 - acorn-import-phases@1.0.3(acorn@8.15.0): + acorn-import-phases@1.0.4(acorn@8.15.0): dependencies: acorn: 8.15.0 @@ -15321,7 +15322,7 @@ snapshots: axios@1.10.0: dependencies: - follow-redirects: 1.15.9 + follow-redirects: 1.15.9(debug@4.4.1) form-data: 4.0.3 proxy-from-env: 1.1.0 transitivePeerDependencies: @@ -17457,8 +17458,6 @@ snapshots: dependencies: dtype: 2.0.0 - follow-redirects@1.15.9: {} - follow-redirects@1.15.9(debug@4.4.1): optionalDependencies: debug: 4.4.1 @@ -19317,7 +19316,7 @@ snapshots: langs@2.0.0: {} - langsmith@0.3.44(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)): + langsmith@0.3.46(@opentelemetry/api@1.9.0)(openai@4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)): dependencies: '@types/uuid': 10.0.0 chalk: 4.1.2 @@ -20156,7 +20155,7 @@ snapshots: transitivePeerDependencies: - encoding - openai@5.9.0(ws@8.18.3)(zod@3.25.76): + openai@5.10.1(ws@8.18.3)(zod@3.25.76): optionalDependencies: ws: 8.18.3 zod: 3.25.76 @@ -23123,7 +23122,7 @@ snapshots: '@webassemblyjs/wasm-edit': 1.14.1 '@webassemblyjs/wasm-parser': 1.14.1 acorn: 8.15.0 - acorn-import-phases: 1.0.3(acorn@8.15.0) + acorn-import-phases: 1.0.4(acorn@8.15.0) browserslist: 4.25.1 chrome-trace-event: 1.0.4 enhanced-resolve: 5.18.2 @@ -23155,7 +23154,7 @@ snapshots: '@webassemblyjs/wasm-edit': 1.14.1 '@webassemblyjs/wasm-parser': 1.14.1 acorn: 8.15.0 - acorn-import-phases: 1.0.3(acorn@8.15.0) + acorn-import-phases: 1.0.4(acorn@8.15.0) browserslist: 4.25.1 chrome-trace-event: 1.0.4 enhanced-resolve: 5.18.2 diff --git a/src/packages/server/llm/anthropic.ts b/src/packages/server/llm/anthropic.ts index 52078757443..b8902431eb7 100644 --- a/src/packages/server/llm/anthropic.ts +++ b/src/packages/server/llm/anthropic.ts @@ -1,9 +1,11 @@ import { ChatAnthropic } from "@langchain/anthropic"; +import { AIMessageChunk } from "@langchain/core/messages"; import { ChatPromptTemplate, MessagesPlaceholder, } from "@langchain/core/prompts"; import { RunnableWithMessageHistory } from "@langchain/core/runnables"; +import { concat } from "@langchain/core/utils/stream"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings"; @@ -19,18 +21,11 @@ import { numTokens } from "./chatgpt-numtokens"; const log = getLogger("llm:anthropic"); function getModelName(model: AnthropicModel): string { - // The -4k and -8k variants have a limited context window (by us here) while offered for free - if (model === "claude-3-sonnet-4k") { - model = "claude-3-sonnet"; - } else if (model === "claude-3-haiku-8k") { - model = "claude-3-haiku"; - } else if (model === "claude-3-opus-8k") { - model = "claude-3-opus"; - } else if (model === "claude-3-5-sonnet-4k") { - model = "claude-3-5-sonnet"; + const id = ANTHROPIC_VERSION[model]; + if (id == null) { + throw new Error(`Anthropic model ${model} is no longer supported`); } - // now we have a valid name, and we have to append their static version number - return `${model}-${ANTHROPIC_VERSION[model]}`; + return id; } interface AnthropicOpts { @@ -105,9 +100,8 @@ export async function evaluateAnthropic( inputMessagesKey: "input", historyMessagesKey: "history", getMessageHistory: async () => { - const { messageHistory, tokens } = await transformHistoryToMessages( - history, - ); + const { messageHistory, tokens } = + await transformHistoryToMessages(history); historyTokens = tokens; return messageHistory; }, @@ -115,24 +109,57 @@ export async function evaluateAnthropic( const chunks = await chainWithHistory.stream({ input }); + let finalResult: AIMessageChunk | undefined; let output = ""; for await (const chunk of chunks) { const { content } = chunk; if (typeof content !== "string") continue; output += content; opts.stream?.(content); + + // Collect the final result to check for usage metadata + if (finalResult) { + finalResult = concat(finalResult, chunk); + } else { + finalResult = chunk; + } } opts.stream?.(null); - // we use that GPT3 tokenizer to get an approximate number of tokens - const prompt_tokens = numTokens(input) + historyTokens; - const completion_tokens = numTokens(output); - - return { - output, - total_tokens: prompt_tokens + completion_tokens, - completion_tokens, - prompt_tokens, - }; + // Check for usage metadata from LangChain first (more accurate) + const usage_metadata = finalResult?.usage_metadata; + log.debug("usage_metadata", usage_metadata); + + if (usage_metadata) { + const { input_tokens, output_tokens, total_tokens } = usage_metadata; + log.debug("evaluateAnthropic successful (using usage_metadata)", { + input_tokens, + output_tokens, + total_tokens, + }); + + return { + output, + total_tokens, + completion_tokens: output_tokens, + prompt_tokens: input_tokens, + }; + } else { + // Fallback to manual token counting (approximation using GPT-3 tokenizer) + const prompt_tokens = numTokens(input) + historyTokens; + const completion_tokens = numTokens(output); + + log.debug("evaluateAnthropic successful (using manual counting)", { + prompt_tokens, + completion_tokens, + }); + + return { + output, + total_tokens: prompt_tokens + completion_tokens, + completion_tokens, + prompt_tokens, + }; + } } diff --git a/src/packages/server/llm/call-llm.ts b/src/packages/server/llm/call-llm.ts index 5eeebcfc601..7ea0df60c8a 100644 --- a/src/packages/server/llm/call-llm.ts +++ b/src/packages/server/llm/call-llm.ts @@ -2,11 +2,13 @@ import { delay } from "awaiting"; import type OpenAI from "openai"; import getLogger from "@cocalc/backend/logger"; import { OpenAIMessages, OpenAIModel } from "@cocalc/util/db-schema/llm-utils"; -import type { ChatOutput, Stream as StreamFunction } from "@cocalc/util/types/llm"; +import type { + ChatOutput, + Stream as StreamFunction, +} from "@cocalc/util/types/llm"; import { totalNumTokens } from "./chatgpt-numtokens"; import type { Stream } from "openai/streaming"; - const log = getLogger("llm:call-llm"); interface CallChatGPTOpts { diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index d792ff447cd..c738dd3e2bf 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -151,8 +151,8 @@ export async function getCustomOpenAI(model: string) { ); } - const settings = await getServerSettings(); - const config = settings.custom_openai_configuration?.[model]; + const { custom_openai_configuration } = await getServerSettings(); + const config = custom_openai_configuration?.[model]; if (!config) { throw new Error( `Custom OpenAI model ${model} not configured – you have to create an entry {${model}: {baseUrl: "https://...", ...}} in the "Custom OpenAI Configuration" entry of the server settings!`, @@ -173,12 +173,22 @@ export async function getCustomOpenAI(model: string) { // extract all other properties from the config, except the url, model, keepAlive field and the "cocalc" field const other = omit(config, ["baseUrl", "model", "keepAlive", "cocalc"]); - const customOpenAIConfig = { + + // Handle legacy API key field names for backward compatibility + const customOpenAIConfig: any = { configuration: { baseURL }, // https://js.langchain.com/docs/integrations/chat/openai/#custom-urls model: config.model ?? model, ...other, }; + // Convert legacy API key field names to the expected "apiKey" field + if (config.openAIApiKey && !customOpenAIConfig.apiKey) { + customOpenAIConfig.apiKey = config.openAIApiKey; + } + if (config.azureOpenAIApiKey && !customOpenAIConfig.apiKey) { + customOpenAIConfig.apiKey = config.azureOpenAIApiKey; + } + log.debug( "Instantiating Custom OpenAI client with config (omitting api keys)", omit(customOpenAIConfig, ["apiKey", "openAIApiKey", "azureOpenAIApiKey"]), diff --git a/src/packages/server/llm/custom-openai.ts b/src/packages/server/llm/custom-openai.ts index a6dd5c7b627..cb925c392e7 100644 --- a/src/packages/server/llm/custom-openai.ts +++ b/src/packages/server/llm/custom-openai.ts @@ -4,11 +4,13 @@ import { isCustomOpenAI, } from "@cocalc/util/db-schema/llm-utils"; import type { ChatOutput, History, Stream } from "@cocalc/util/types/llm"; +import { AIMessageChunk } from "@langchain/core/messages"; import { ChatPromptTemplate, MessagesPlaceholder, } from "@langchain/core/prompts"; import { RunnableWithMessageHistory } from "@langchain/core/runnables"; +import { concat } from "@langchain/core/utils/stream"; import { ChatOpenAI as ChatOpenAILC, OpenAICallOptions, @@ -51,7 +53,7 @@ export async function evaluateCustomOpenAI( const prompt = ChatPromptTemplate.fromMessages([ ["system", system ?? ""], - new MessagesPlaceholder("chat_history"), + new MessagesPlaceholder("history"), ["human", "{input}"], ]); @@ -63,11 +65,10 @@ export async function evaluateCustomOpenAI( runnable: chain, config: { configurable: { sessionId: "ignored" } }, inputMessagesKey: "input", - historyMessagesKey: "chat_history", + historyMessagesKey: "history", getMessageHistory: async () => { - const { messageHistory, tokens } = await transformHistoryToMessages( - history, - ); + const { messageHistory, tokens } = + await transformHistoryToMessages(history); historyTokens = tokens; return messageHistory; }, @@ -75,6 +76,7 @@ export async function evaluateCustomOpenAI( const chunks = await chainWithHistory.stream({ input }); + let finalResult: AIMessageChunk | undefined; let output = ""; for await (const chunk of chunks) { const { content } = chunk; @@ -83,19 +85,51 @@ export async function evaluateCustomOpenAI( } output += content; opts.stream?.(content); + + // Collect the final result to check for usage metadata + if (finalResult) { + finalResult = concat(finalResult, chunk); + } else { + finalResult = chunk; + } } // and an empty call when done opts.stream?.(null); - // we use that GPT3 tokenizer to get an approximate number of tokens - const prompt_tokens = numTokens(input) + historyTokens; - const completion_tokens = numTokens(output); + // Check for usage metadata from LangChain first (more accurate) + const usage_metadata = finalResult?.usage_metadata; + log.debug("usage_metadata", usage_metadata); + + if (usage_metadata) { + const { input_tokens, output_tokens, total_tokens } = usage_metadata; + log.debug("evaluateCustomOpenAI successful (using usage_metadata)", { + input_tokens, + output_tokens, + total_tokens, + }); - return { - output, - total_tokens: prompt_tokens + completion_tokens, - completion_tokens, - prompt_tokens, - }; + return { + output, + total_tokens, + completion_tokens: output_tokens, + prompt_tokens: input_tokens, + }; + } else { + // Fallback to manual token counting (approximation using GPT-3 tokenizer) + const prompt_tokens = numTokens(input) + historyTokens; + const completion_tokens = numTokens(output); + + log.debug("evaluateCustomOpenAI successful (using manual counting)", { + prompt_tokens, + completion_tokens, + }); + + return { + output, + total_tokens: prompt_tokens + completion_tokens, + completion_tokens, + prompt_tokens, + }; + } } diff --git a/src/packages/server/llm/evaluate-lc.ts b/src/packages/server/llm/evaluate-lc.ts new file mode 100644 index 00000000000..8f986205cfb --- /dev/null +++ b/src/packages/server/llm/evaluate-lc.ts @@ -0,0 +1,406 @@ +/** + * Unified LangChain evaluation implementation + * + * This file provides a unified interface for all LangChain-based LLM providers, + * eliminating code duplication while preserving all provider-specific functionality. + */ + +import getLogger from "@cocalc/backend/logger"; +import { getServerSettings } from "@cocalc/database/settings"; +import { ServerSettings } from "@cocalc/database/settings/server-settings"; +import { + ANTHROPIC_VERSION, + AnthropicModel, + fromCustomOpenAIModel, + GOOGLE_MODEL_TO_ID, + GoogleModel, + isAnthropicModel, + isCustomOpenAI, + isGoogleModel, + isMistralModel, + isOpenAIModel, +} from "@cocalc/util/db-schema/llm-utils"; +import type { ChatOutput, History, Stream } from "@cocalc/util/types/llm"; +import { ChatAnthropic } from "@langchain/anthropic"; +import { AIMessageChunk } from "@langchain/core/messages"; +import { + ChatPromptTemplate, + MessagesPlaceholder, +} from "@langchain/core/prompts"; +import { RunnableWithMessageHistory } from "@langchain/core/runnables"; +import { concat } from "@langchain/core/utils/stream"; +import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; +import { ChatMistralAI } from "@langchain/mistralai"; +import { ChatOpenAI } from "@langchain/openai"; +import { transformHistoryToMessages } from "./chat-history"; +import { numTokens } from "./chatgpt-numtokens"; +import { getCustomOpenAI } from "./client"; +import { normalizeOpenAIModel } from "./index"; + +const log = getLogger("llm:evaluate-lc"); + +// Common interface for all LLM evaluation options +export interface LLMEvaluationOptions { + input: string; + system?: string; + history?: History; + model: string; + stream?: Stream; + maxTokens?: number; + apiKey?: string; +} + +// Provider-specific client configuration +export interface LLMProviderConfig { + // Provider identification + name: string; + + // Client creation function + createClient: ( + options: LLMEvaluationOptions, + settings: ServerSettings, + mode: "cocalc" | "user", + ) => Promise; + + // Model processing + canonicalModel?: (model: string) => string; + + // Special handling flags + getSystemRole?: (model: string) => string; + + // Token counting fallback + getTokenCountFallback?: ( + input: string, + output: string, + historyTokens: number, + model: string, + settings: any, + ) => Promise<{ prompt_tokens: number; completion_tokens: number }>; +} + +function isO1Model(normalizedModel) { + return normalizedModel === "o1" || normalizedModel === "o1-mini"; +} + +// Provider configurations +export const PROVIDER_CONFIGS = { + openai: { + name: "OpenAI", + createClient: async (options, settings) => { + const { openai_api_key: apiKey } = settings; + const normalizedModel = normalizeOpenAIModel(options.model); + + log.debug( + `OpenAI createClient: original=${options.model}, normalized=${normalizedModel}`, + ); + + // Check if it's O1 model (doesn't support streaming) + const isO1 = isO1Model(normalizedModel); + return new ChatOpenAI({ + model: normalizedModel, + apiKey: options.apiKey || apiKey, + maxTokens: options.maxTokens, + streaming: options.stream != null && !isO1, + streamUsage: true, + ...(options.stream != null && !isO1 + ? { streamOptions: { includeUsage: true } } + : {}), + }); + }, + canonicalModel: (model) => normalizeOpenAIModel(model), + getSystemRole: (_model) => "system", + getTokenCountFallback: async (input, output, historyTokens) => ({ + prompt_tokens: numTokens(input) + historyTokens, + completion_tokens: numTokens(output), + }), + }, + + google: { + name: "Google GenAI", + createClient: async (options, settings, mode) => { + const apiKey = + mode === "cocalc" ? settings.google_vertexai_key : options.apiKey; + const modelName = + mode === "cocalc" + ? GOOGLE_MODEL_TO_ID[options.model as GoogleModel] ?? options.model + : options.model; + + log.debug( + `Google createClient: original=${options.model}, modelName=${modelName}`, + ); + + return new ChatGoogleGenerativeAI({ + model: modelName, + apiKey: options.apiKey || apiKey, + maxOutputTokens: options.maxTokens, + // Only enable thinking tokens for Gemini 2.5 models + ...(modelName === "gemini-2.5-flash" || modelName === "gemini-2.5-pro" + ? { maxReasoningTokens: 1024 } + : {}), + streaming: true, + }); + }, + canonicalModel: (model) => + GOOGLE_MODEL_TO_ID[model as GoogleModel] ?? model, + getTokenCountFallback: async (input, output, historyTokens) => ({ + prompt_tokens: numTokens(input) + historyTokens, + completion_tokens: numTokens(output), + }), + }, + + anthropic: { + name: "Anthropic", + createClient: async (options, settings, mode) => { + const apiKey = + mode === "cocalc" ? settings.anthropic_api_key : options.apiKey; + const modelName = + mode === "cocalc" + ? ANTHROPIC_VERSION[options.model as AnthropicModel] + : options.model; + + if (modelName == null) { + throw new Error( + `Anthropic model ${options.model} is no longer supported`, + ); + } + + log.debug( + `Anthropic createClient: original=${options.model}, modelVersion=${modelName}`, + ); + + return new ChatAnthropic({ + model: modelName, + apiKey, + maxTokens: options.maxTokens, + }); + }, + canonicalModel: (model) => { + const version = ANTHROPIC_VERSION[model as AnthropicModel]; + if (version == null) { + throw new Error(`Anthropic model ${model} is no longer supported`); + } + return version; + }, + getTokenCountFallback: async (input, output, historyTokens) => ({ + prompt_tokens: numTokens(input) + historyTokens, + completion_tokens: numTokens(output), + }), + }, + + mistral: { + name: "Mistral", + createClient: async (options, settings, mode) => { + const apiKey = + mode === "cocalc" ? settings.mistral_api_key : options.apiKey; + + log.debug(`Mistral createClient: model=${options.model}`); + + return new ChatMistralAI({ + model: options.model, + apiKey, + }); + }, + getTokenCountFallback: async (input, output, historyTokens) => ({ + prompt_tokens: numTokens(input) + historyTokens, + completion_tokens: numTokens(output), + }), + }, + + "custom-openai": { + name: "Custom OpenAI", + createClient: async (options, _settings) => { + const transformedModel = fromCustomOpenAIModel(options.model); + log.debug( + `Custom OpenAI createClient: original=${options.model}, transformed=${transformedModel}`, + ); + return await getCustomOpenAI(transformedModel); + }, + canonicalModel: (model) => fromCustomOpenAIModel(model), + getTokenCountFallback: async (input, output, historyTokens) => ({ + prompt_tokens: numTokens(input) + historyTokens, + completion_tokens: numTokens(output), + }), + }, +} as const satisfies Record; + +// Get provider config based on model +export function getProviderConfig(model: string): LLMProviderConfig { + if (isOpenAIModel(model)) { + return PROVIDER_CONFIGS.openai; + } else if (isGoogleModel(model)) { + return PROVIDER_CONFIGS.google; + } else if (isAnthropicModel(model)) { + return PROVIDER_CONFIGS.anthropic; + } else if (isMistralModel(model)) { + return PROVIDER_CONFIGS.mistral; + } else if (isCustomOpenAI(model)) { + return PROVIDER_CONFIGS["custom-openai"]; + } else { + throw new Error(`Unknown model provider for: ${model}`); + } +} + +// Content processing helper +function content2string(content: any): string { + if (typeof content === "string") { + return content; + } else if (Array.isArray(content)) { + const output0 = content[0]; + if (output0?.type === "text") { + return output0.text; + } + } + + log.debug("content2string unable to process", content); + return ""; +} + +// Main unified evaluation function +export async function evaluateWithLangChain( + options: LLMEvaluationOptions, + mode: "cocalc" | "user" = "cocalc", +): Promise { + const { input, system, history = [], model, stream, maxTokens } = options; + + log.debug("evaluateWithLangChain", { + input, + history, + system, + model, + stream: stream != null, + maxTokens, + }); + + // Get provider configuration + const config = getProviderConfig(model); + + // Get server settings + const settings = await getServerSettings(); + + // Create LangChain client + const client = await config.createClient(options, settings, mode); + + // Canonical model name + const canonicalModel = config.canonicalModel + ? config.canonicalModel(model) + : model; + + // Determine system role (always use "history" for historyKey) + const systemRole = config.getSystemRole + ? config.getSystemRole(model) + : "system"; + + const historyMessagesKey = "history"; + + // Create prompt template + // For o1 models, omit the system message entirely since they don't support system roles + const isO1 = isO1Model(canonicalModel); + const prompt = isO1 + ? ChatPromptTemplate.fromMessages([ + new MessagesPlaceholder(historyMessagesKey), + ["human", system ? `${system}\n\n{input}` : "{input}"], + ]) + : ChatPromptTemplate.fromMessages([ + [systemRole, system ?? ""], + new MessagesPlaceholder(historyMessagesKey), + ["human", "{input}"], + ]); + + const chain = prompt.pipe(client); + + let historyTokens = 0; + + // Set up chain with history + const chainWithHistory = new RunnableWithMessageHistory({ + runnable: chain, + config: { configurable: { sessionId: "ignored" } }, + inputMessagesKey: "input", + historyMessagesKey, + getMessageHistory: async () => { + const { messageHistory, tokens } = await transformHistoryToMessages( + history, + ); + historyTokens = tokens; + return messageHistory; + }, + }); + + let finalResult: AIMessageChunk | undefined; + let output = ""; + + if (stream) { + // Streaming mode + const chunks = await chainWithHistory.stream({ input }); + + for await (const chunk of chunks) { + const chunkTyped = chunk as AIMessageChunk; + const { content } = chunkTyped; + const contentStr = content2string(content); + + if (typeof content === "string") { + output += content; + stream(content); + } else if (contentStr) { + output += contentStr; + stream(contentStr); + } + + // Collect final result for usage metadata + if (finalResult) { + finalResult = concat(finalResult, chunkTyped); + } else { + finalResult = chunkTyped; + } + } + } else { + // Non-streaming mode + finalResult = (await chainWithHistory.invoke({ input })) as AIMessageChunk; + const { content } = finalResult; + output = content2string(content); + } + + stream?.(null); + + // Token counting - prefer usage_metadata, fallback to provider-specific method + const usage_metadata = finalResult?.usage_metadata; + log.debug("usage_metadata", usage_metadata); + + if (usage_metadata) { + const { input_tokens, output_tokens, total_tokens } = usage_metadata; + log.debug(`${config.name} successful (using usage_metadata)`, { + input_tokens, + output_tokens, + total_tokens, + }); + + return { + output, + total_tokens, + completion_tokens: output_tokens, + prompt_tokens: input_tokens, + }; + } else { + // Fallback to provider-specific token counting + const tokenCount = config.getTokenCountFallback + ? await config.getTokenCountFallback( + input, + output, + historyTokens, + model, + settings, + ) + : { + prompt_tokens: numTokens(input) + historyTokens, + completion_tokens: numTokens(output), + }; + + log.debug(`${config.name} successful (using manual counting)`, tokenCount); + + return { + output, + total_tokens: tokenCount.prompt_tokens + tokenCount.completion_tokens, + completion_tokens: tokenCount.completion_tokens, + prompt_tokens: tokenCount.prompt_tokens, + }; + } +} diff --git a/src/packages/server/llm/google-genai-client.ts b/src/packages/server/llm/google-genai-client.ts index a83172f97d1..676de4352af 100644 --- a/src/packages/server/llm/google-genai-client.ts +++ b/src/packages/server/llm/google-genai-client.ts @@ -5,11 +5,13 @@ */ import { GenerativeModel, GoogleGenerativeAI } from "@google/generative-ai"; +import { AIMessageChunk } from "@langchain/core/messages"; import { ChatPromptTemplate, MessagesPlaceholder, } from "@langchain/core/prompts"; import { RunnableWithMessageHistory } from "@langchain/core/runnables"; +import { concat } from "@langchain/core/utils/stream"; import { ChatGoogleGenerativeAI } from "@langchain/google-genai"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings"; @@ -110,13 +112,14 @@ export class GoogleGenAIClient { model: modelName, apiKey: this.apiKey, maxOutputTokens: maxTokens, + // Only enable thinking tokens for Gemini 2.5 models + ...(modelName === "gemini-2.5-flash" || modelName === "gemini-2.5-pro" + ? { maxReasoningTokens: 1024 } + : {}), streaming: true, }); - // However, we also count tokens, and for that we use "gemini-1.5-pro" only - const geminiPro: GenerativeModel = this.genAI.getGenerativeModel({ - model: "gemini-1.5-pro", - }); + // Token counting will be done using either usage_metadata or the actual model const prompt = ChatPromptTemplate.fromMessages([ ["system", system ?? ""], @@ -139,32 +142,74 @@ export class GoogleGenAIClient { const chunks = await chainWithHistory.stream({ input }); + let finalResult: AIMessageChunk | undefined; let output = ""; for await (const chunk of chunks) { const { content } = chunk; if (typeof content !== "string") continue; output += content; stream?.(content); + + // Collect the final result to check for usage metadata + if (finalResult) { + finalResult = concat(finalResult, chunk); + } else { + finalResult = chunk; + } } stream?.(null); - const { totalTokens: prompt_tokens } = await geminiPro.countTokens([ - input, - system ?? "", - ...history.map(({ content }) => content), - ]); - - const { totalTokens: completion_tokens } = - await geminiPro.countTokens(output); + // Check for usage metadata from LangChain first (more accurate, includes thinking tokens) + const usage_metadata = finalResult?.usage_metadata; + log.debug("usage_metadata", usage_metadata); + + if (usage_metadata) { + const { input_tokens, output_tokens, total_tokens } = usage_metadata; + log.debug("chatGemini successful (using usage_metadata)", { + input_tokens, + output_tokens, + total_tokens, + usage_metadata, // Log full metadata to see what other fields might be available + }); - log.debug("chatGemini successful", { prompt_tokens, completion_tokens }); + // For now, return the standard ChatOutput format + // TODO: Consider extending ChatOutput interface to include thinking_tokens if available + return { + output, + total_tokens, + completion_tokens: output_tokens, + prompt_tokens: input_tokens, + }; + } else { + // Fallback to manual token counting using the actual model (not hardcoded) + const tokenCountingModel: GenerativeModel = this.genAI.getGenerativeModel( + { + model: modelName, + }, + ); + + const { totalTokens: prompt_tokens } = + await tokenCountingModel.countTokens([ + input, + system ?? "", + ...history.map(({ content }) => content), + ]); + + const { totalTokens: completion_tokens } = + await tokenCountingModel.countTokens(output); + + log.debug("chatGemini successful (using manual counting)", { + prompt_tokens, + completion_tokens, + }); - return { - output, - total_tokens: prompt_tokens + completion_tokens, - completion_tokens, - prompt_tokens, - }; + return { + output, + total_tokens: prompt_tokens + completion_tokens, + completion_tokens, + prompt_tokens, + }; + } } } diff --git a/src/packages/server/llm/google-lc.ts b/src/packages/server/llm/google-lc.ts index 657dd3cb403..6c0537465b7 100644 --- a/src/packages/server/llm/google-lc.ts +++ b/src/packages/server/llm/google-lc.ts @@ -1,8 +1,10 @@ +import { AIMessageChunk } from "@langchain/core/messages"; import { ChatPromptTemplate, MessagesPlaceholder, } from "@langchain/core/prompts"; import { RunnableWithMessageHistory } from "@langchain/core/runnables"; +import { concat } from "@langchain/core/utils/stream"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings"; @@ -94,24 +96,57 @@ export async function evaluateGoogleGenAILC( const chunks = await chainWithHistory.stream({ input }); + let finalResult: AIMessageChunk | undefined; let output = ""; for await (const chunk of chunks) { const { content } = chunk; if (typeof content !== "string") continue; output += content; opts.stream?.(content); + + // Collect the final result to check for usage metadata + if (finalResult) { + finalResult = concat(finalResult, chunk); + } else { + finalResult = chunk; + } } opts.stream?.(null); - // we use that GPT3 tokenizer to get an approximate number of tokens - const prompt_tokens = numTokens(input) + historyTokens; - const completion_tokens = numTokens(output); - - return { - output, - total_tokens: prompt_tokens + completion_tokens, - completion_tokens, - prompt_tokens, - }; + // Check for usage metadata from LangChain first (more accurate) + const usage_metadata = finalResult?.usage_metadata; + log.debug("usage_metadata", usage_metadata); + + if (usage_metadata) { + const { input_tokens, output_tokens, total_tokens } = usage_metadata; + log.debug("evaluateGoogleGenAILC successful (using usage_metadata)", { + input_tokens, + output_tokens, + total_tokens, + }); + + return { + output, + total_tokens, + completion_tokens: output_tokens, + prompt_tokens: input_tokens, + }; + } else { + // Fallback to manual token counting (approximation using GPT-3 tokenizer) + const prompt_tokens = numTokens(input) + historyTokens; + const completion_tokens = numTokens(output); + + log.debug("evaluateGoogleGenAILC successful (using manual counting)", { + prompt_tokens, + completion_tokens, + }); + + return { + output, + total_tokens: prompt_tokens + completion_tokens, + completion_tokens, + prompt_tokens, + }; + } } diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index 8e398f4b585..32fdeb574d7 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -48,6 +48,7 @@ import type { } from "@cocalc/util/types/llm"; import { checkForAbuse } from "./abuse"; import { evaluateAnthropic } from "./anthropic"; +import { evaluateWithLangChain } from "./evaluate-lc"; import { callChatGPTAPI } from "./call-llm"; import { getClient } from "./client"; import { evaluateCustomOpenAI } from "./custom-openai"; @@ -64,6 +65,10 @@ const DEBUG_THROW_LLM_ERROR = process.env.DEBUG_THROW_LLM_ERROR === "true"; const log = getLogger("llm"); +// Feature flag to use the new unified LangChain implementation +export const USE_NEWER_LC_IMPL = + (process.env.COCALC_LLM_USE_NEWER_LC_IMPL ?? "true") === "true"; + async function getDefaultModel(): Promise { return ((await getServerSettings()).default_llm ?? DEFAULT_MODEL) as LanguageModel; @@ -178,31 +183,52 @@ async function evaluateImpl({ const { output, total_tokens, prompt_tokens, completion_tokens } = await (async () => { - if (isUserDefinedModel(model)) { - return await evaluateUserDefinedLLM(params, account_id); - } else if (isOllamaLLM(model)) { - return await evaluateOllama(params); - } else if (isCustomOpenAI(model)) { - return await evaluateCustomOpenAI(params); - } else if (isMistralModel(model)) { - return await evaluateMistral(params); - } else if (isAnthropicModel(model)) { - return await evaluateAnthropic(params); - } else if (isGoogleModel(model)) { - const client = await getClient(model); - if (!(client instanceof GoogleGenAIClient)) { - throw new Error("Wrong client. This should never happen. [GenAI]"); + if (USE_NEWER_LC_IMPL) { + // Use the new unified LangChain implementation + if (isUserDefinedModel(model)) { + return await evaluateUserDefinedLLM(params, account_id); + } else if (isOllamaLLM(model)) { + return await evaluateOllama(params); + } else if ( + isCustomOpenAI(model) || + isMistralModel(model) || + isAnthropicModel(model) || + isGoogleModel(model) || + isOpenAIModel(model) + ) { + // Use unified implementation for LangChain-based providers + return await evaluateWithLangChain(params); + } else { + throw new Error(`Unable to handel model '${model}'.`); } - return await evaluateGoogleGenAI({ ...params, client }); - } else if (isOpenAIModel(model)) { - return await evaluateOpenAILC(params); } else { - throw new Error(`Unable to handel model '${model}'.`); - // const client = await getClient(model); - // if (!(client instanceof OpenAI)) { - // throw new Error("Wrong client. This should never happen. [OpenAI]"); - // } - // return await evaluateOpenAI({ ...params, client }); + // Use the original file-by-file implementation + if (isUserDefinedModel(model)) { + return await evaluateUserDefinedLLM(params, account_id); + } else if (isOllamaLLM(model)) { + return await evaluateOllama(params); + } else if (isCustomOpenAI(model)) { + return await evaluateCustomOpenAI(params); + } else if (isMistralModel(model)) { + return await evaluateMistral(params); + } else if (isAnthropicModel(model)) { + return await evaluateAnthropic(params); + } else if (isGoogleModel(model)) { + const client = await getClient(model); + if (!(client instanceof GoogleGenAIClient)) { + throw new Error("Wrong client. This should never happen. [GenAI]"); + } + return await evaluateGoogleGenAI({ ...params, client }); + } else if (isOpenAIModel(model)) { + return await evaluateOpenAILC(params); + } else { + throw new Error(`Unable to handel model '${model}'.`); + // const client = await getClient(model); + // if (!(client instanceof OpenAI)) { + // throw new Error("Wrong client. This should never happen. [OpenAI]"); + // } + // return await evaluateOpenAI({ ...params, client }); + } } })(); diff --git a/src/packages/server/llm/mistral.ts b/src/packages/server/llm/mistral.ts index c50de74afe9..7a6c5ec82fa 100644 --- a/src/packages/server/llm/mistral.ts +++ b/src/packages/server/llm/mistral.ts @@ -1,8 +1,10 @@ +import { AIMessageChunk } from "@langchain/core/messages"; import { ChatPromptTemplate, MessagesPlaceholder, } from "@langchain/core/prompts"; import { RunnableWithMessageHistory } from "@langchain/core/runnables"; +import { concat } from "@langchain/core/utils/stream"; import { ChatMistralAI, ChatMistralAIInput } from "@langchain/mistralai"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings"; @@ -86,24 +88,57 @@ export async function evaluateMistral( const chunks = await chainWithHistory.stream({ input }); + let finalResult: AIMessageChunk | undefined; let output = ""; for await (const chunk of chunks) { const { content } = chunk; if (typeof content !== "string") continue; output += content; opts.stream?.(content); + + // Collect the final result to check for usage metadata + if (finalResult) { + finalResult = concat(finalResult, chunk); + } else { + finalResult = chunk; + } } opts.stream?.(null); - // we use that GPT3 tokenizer to get an approximate number of tokens - const prompt_tokens = numTokens(input) + historyTokens; - const completion_tokens = numTokens(output); - - return { - output, - total_tokens: prompt_tokens + completion_tokens, - completion_tokens, - prompt_tokens, - }; + // Check for usage metadata from LangChain first (more accurate) + const usage_metadata = finalResult?.usage_metadata; + log.debug("usage_metadata", usage_metadata); + + if (usage_metadata) { + const { input_tokens, output_tokens, total_tokens } = usage_metadata; + log.debug("evaluateMistral successful (using usage_metadata)", { + input_tokens, + output_tokens, + total_tokens, + }); + + return { + output, + total_tokens, + completion_tokens: output_tokens, + prompt_tokens: input_tokens, + }; + } else { + // Fallback to manual token counting (approximation using GPT-3 tokenizer) + const prompt_tokens = numTokens(input) + historyTokens; + const completion_tokens = numTokens(output); + + log.debug("evaluateMistral successful (using manual counting)", { + prompt_tokens, + completion_tokens, + }); + + return { + output, + total_tokens: prompt_tokens + completion_tokens, + completion_tokens, + prompt_tokens, + }; + } } diff --git a/src/packages/server/llm/ollama.ts b/src/packages/server/llm/ollama.ts index ffff2850689..50321a038cf 100644 --- a/src/packages/server/llm/ollama.ts +++ b/src/packages/server/llm/ollama.ts @@ -44,9 +44,11 @@ export async function evaluateOllama( const ollama = client ?? (await getOllama(model)); + const historyMessagesKey = "history"; + const prompt = ChatPromptTemplate.fromMessages([ ["system", system ?? ""], - new MessagesPlaceholder("chat_history"), + new MessagesPlaceholder(historyMessagesKey), ["human", "{input}"], ]); @@ -58,11 +60,10 @@ export async function evaluateOllama( runnable: chain, config: { configurable: { sessionId: "ignored" } }, inputMessagesKey: "input", - historyMessagesKey: "chat_history", + historyMessagesKey, getMessageHistory: async () => { - const { messageHistory, tokens } = await transformHistoryToMessages( - history, - ); + const { messageHistory, tokens } = + await transformHistoryToMessages(history); historyTokens = tokens; return messageHistory; }, diff --git a/src/packages/server/llm/openai-lc.ts b/src/packages/server/llm/openai-lc.ts index 85dcd915923..3ad0de0ff72 100644 --- a/src/packages/server/llm/openai-lc.ts +++ b/src/packages/server/llm/openai-lc.ts @@ -55,8 +55,8 @@ export async function evaluateOpenAILC( // As of Jan 2025: reasoning models (o1) do not support streaming // https://platform.openai.com/docs/guides/reasoning/ - const isO1 = model != "o1-mini" && model != "o1"; - const streaming = stream != null && isO1; + const isO1 = model.includes("o1"); + const streaming = stream != null && !isO1; // This is also quite big -- only uncomment when developing and needing this. // log.debug("evaluateOpenAILC", { @@ -75,10 +75,10 @@ export async function evaluateOpenAILC( ...params, maxTokens, streaming, - }).bind(isO1 ? {} : { stream_options: { include_usage: true } }); + }).withConfig(streaming ? { stream_options: { include_usage: true } } : {}); const prompt = ChatPromptTemplate.fromMessages([ - [isO1 ? "developer" : "system", system ?? ""], + ["system", system ?? ""], new MessagesPlaceholder("history"), ["human", "{input}"], ]); diff --git a/src/packages/server/llm/test/00.test.ts b/src/packages/server/llm/test/00.test.ts deleted file mode 100644 index 5834de3bed6..00000000000 --- a/src/packages/server/llm/test/00.test.ts +++ /dev/null @@ -1,154 +0,0 @@ -// import { log } from "console"; - -import getPool, { initEphemeralDatabase } from "@cocalc/database/pool"; -import { - AnthropicModel, - LanguageModelCore, - // GoogleModel, - MistralModel, - isAnthropicModel, - isGoogleModel, - isMistralModel, - isOpenAIModel, -} from "@cocalc/util/db-schema/llm-utils"; -// import { evaluateMistral } from "../mistral"; -import { evaluateAnthropic } from "../anthropic"; -import { GoogleGenAIClient } from "../google-genai-client"; -import { evaluateMistral } from "../mistral"; -import { evaluateOpenAILC } from "../openai-lc"; -import { enableModels, setupAPIKeys, test_llm } from "./shared"; -import { evaluateGoogleGenAI } from ".."; -import { getClient } from "../client"; - -beforeAll(async () => { - await initEphemeralDatabase(); - await setupAPIKeys(); - await enableModels(); -}, 15000); - -afterAll(async () => { - await getPool().end(); -}); - -const QUERY = { - input: "What's 99 + 1?", - system: "Reply only the value.", -} as const; - -function checkAnswer(answer) { - const { output, total_tokens, completion_tokens, prompt_tokens } = answer; - expect(output).toContain("100"); - expect(total_tokens).toEqual(prompt_tokens + completion_tokens); - expect(prompt_tokens).toBeGreaterThan(5); - expect(completion_tokens).toBeGreaterThan(0); -} - -async function llmOpenAI(model: LanguageModelCore) { - if (!isOpenAIModel(model)) { - throw new Error(`model: ${model} is not an OpenAI model`); - } - - const answer = await evaluateOpenAILC({ - model, - ...QUERY, - }); - - checkAnswer(answer); -} - -async function llmGoogle(model: LanguageModelCore) { - if (!isGoogleModel(model)) { - throw new Error(`model: ${model} is not a Google model`); - } - const client = (await getClient(model)) as GoogleGenAIClient; - const answer = await evaluateGoogleGenAI({ - model, - client, - ...QUERY, - }); - checkAnswer(answer); -} - -// write a test in jest that fails -test_llm("openai")("OpenAI", () => { - test("gpt3.5 works", async () => { - llmOpenAI("gpt-3.5-turbo"); - }); - test("gpt 4 works", async () => { - llmOpenAI("gpt-4"); - }); - test("gpt 4 turbo works", async () => { - llmOpenAI("gpt-4-turbo-8k"); - }); - test("gpt 4 omni works", async () => { - llmOpenAI("gpt-4o-8k"); - }); - test("gpt 4o mini works", async () => { - llmOpenAI("gpt-4o-mini-8k"); - }); - test("gpt 4.1 works", async () => { - llmOpenAI("gpt-4.1"); - }); - test("gpt 4.1 mini works", async () => { - llmOpenAI("gpt-4.1-mini"); - }); - - // test("gpt o1", async () => { - // llmOpenAI("o1-8k"); - // }); - // test("gpt o1 mini works", async () => { - // llmOpenAI("o1-mini-8k"); - // }); -}); - -// ATTN: does not work everywhere around, geolocation matters -test_llm("google")("Google GenAI", () => { - test("gemini 1.5 pro works", async () => { - llmGoogle("gemini-1.5-pro"); - }); - test("gemini 2.0 flash works", async () => { - llmGoogle("gemini-2.0-flash-8k"); - }); - test("gemini 2.0 flash lite works", async () => { - llmGoogle("gemini-2.0-flash-lite-8k"); - }); -}); - -test_llm("mistralai")("Mistral AI", () => { - const model: MistralModel = "mistral-small-latest"; - - test("model", () => { - expect(isMistralModel(model)).toBe(true); - }); - - // segaults – no clue why. happens with version 0.2.0 - test.skip("basics", async () => { - const answer = await evaluateMistral({ model, ...QUERY }); - checkAnswer(answer); - }); -}); - -test_llm("anthropic")("Anthropic", () => { - const haiku: AnthropicModel = "claude-3-haiku"; - const sonnet: AnthropicModel = "claude-3-5-sonnet-4k"; - const opus: AnthropicModel = "claude-3-opus-8k"; - - test("model", () => { - expect(isAnthropicModel(haiku)).toBe(true); - }); - - test("haiku", async () => { - const answer = await evaluateAnthropic({ model: haiku, ...QUERY }); - checkAnswer(answer); - }); - - test("sonnet", async () => { - const answer = await evaluateAnthropic({ model: sonnet, ...QUERY }); - checkAnswer(answer); - }); - - test("opus", async () => { - const answer = await evaluateAnthropic({ model: opus, ...QUERY }); - checkAnswer(answer); - }); -}); diff --git a/src/packages/server/llm/test/models.test.ts b/src/packages/server/llm/test/models.test.ts new file mode 100644 index 00000000000..6c692352c23 --- /dev/null +++ b/src/packages/server/llm/test/models.test.ts @@ -0,0 +1,505 @@ +// import { log } from "console"; + +import getPool, { initEphemeralDatabase } from "@cocalc/database/pool"; +import { + AnthropicModel, + LanguageModelCore, + // GoogleModel, + MistralModel, + isAnthropicModel, + isGoogleModel, + isMistralModel, + isOpenAIModel, + UserDefinedLLM, + toUserLLMModelName, +} from "@cocalc/util/db-schema/llm-utils"; +import { evaluateGoogleGenAI } from ".."; +import { evaluateAnthropic } from "../anthropic"; +import { getClient } from "../client"; +import createAccount from "../../accounts/create-account"; +import { db } from "@cocalc/database"; +import { callback2 } from "@cocalc/util/async-utils"; +import { OTHER_SETTINGS_USERDEFINED_LLM } from "@cocalc/util/db-schema/defaults"; +import { uuid } from "@cocalc/util/misc"; +import { evaluateWithLangChain } from "../evaluate-lc"; +import { GoogleGenAIClient } from "../google-genai-client"; +import { USE_NEWER_LC_IMPL } from "../index"; +import { evaluateMistral } from "../mistral"; +import { evaluateOpenAILC } from "../openai-lc"; +import { evaluateUserDefinedLLM } from "../user-defined"; +import { enableModels, setupAPIKeys, test_llm } from "./shared"; + +// sometimes (flaky case) they take more than 10s to even start a response +const LLM_TIMEOUT = 15_000; + +beforeAll(async () => { + await initEphemeralDatabase(); + await setupAPIKeys(); + await enableModels(); +}, 15000); + +afterAll(async () => { + await getPool().end(); +}); + +const QUERY = { + input: "What's 99 + 1?", + system: "Reply only the value.", +} as const; + +function checkAnswer(answer) { + const { output, total_tokens, completion_tokens, prompt_tokens } = answer; + expect(output).toContain("100"); + // total tokens is more than that sume for "thinking" models like gemini 2.5 + // because thinking tokens are not part of this + expect(total_tokens).toBeGreaterThanOrEqual( + prompt_tokens + completion_tokens, + ); + expect(prompt_tokens).toBeGreaterThan(5); + expect(completion_tokens).toBeGreaterThan(0); +} + +async function llmOpenAI(model: LanguageModelCore) { + if (!isOpenAIModel(model)) { + throw new Error(`model: ${model} is not an OpenAI model`); + } + + const answer = USE_NEWER_LC_IMPL + ? await evaluateWithLangChain({ + model, + ...QUERY, + }) + : await evaluateOpenAILC({ + model, + ...QUERY, + }); + + checkAnswer(answer); +} + +async function llmGoogle(model: LanguageModelCore) { + if (!isGoogleModel(model)) { + throw new Error(`model: ${model} is not a Google model`); + } + + const answer = USE_NEWER_LC_IMPL + ? await evaluateWithLangChain({ + model, + ...QUERY, + }) + : await (async () => { + const client = (await getClient(model)) as GoogleGenAIClient; + return await evaluateGoogleGenAI({ + model, + client, + ...QUERY, + }); + })(); + + checkAnswer(answer); +} + +// write a test in jest that fails +test_llm("openai")("OpenAI", () => { + test( + "gpt3.5 works", + async () => { + await llmOpenAI("gpt-3.5-turbo"); + }, + LLM_TIMEOUT, + ); + test( + "gpt 4 works", + async () => { + await llmOpenAI("gpt-4"); + }, + LLM_TIMEOUT, + ); + test( + "gpt 4 turbo works", + async () => { + await llmOpenAI("gpt-4-turbo-8k"); + }, + LLM_TIMEOUT, + ); + test( + "gpt 4 omni works", + async () => { + await llmOpenAI("gpt-4o-8k"); + }, + LLM_TIMEOUT, + ); + test( + "gpt 4o mini works", + async () => { + await llmOpenAI("gpt-4o-mini-8k"); + }, + LLM_TIMEOUT, + ); + test( + "gpt 4.1 works", + async () => { + await llmOpenAI("gpt-4.1"); + }, + LLM_TIMEOUT, + ); + test( + "4.1 mini works", + async () => { + await llmOpenAI("gpt-4.1-mini"); + }, + LLM_TIMEOUT, + ); + + test("o1", async () => { + await llmOpenAI("o1-8k"); + }); + + test("o1 mini works", async () => { + await llmOpenAI("o1-mini-8k"); + }); +}); + +// ATTN: does not work everywhere around, geolocation matters +test_llm("google")("Google GenAI", () => { + test( + "gemini 1.5 pro works", + async () => { + await llmGoogle("gemini-1.5-pro"); + }, + LLM_TIMEOUT, + ); + test( + "gemini 2.0 flash works", + async () => { + await llmGoogle("gemini-2.0-flash-8k"); + }, + LLM_TIMEOUT, + ); + test( + "gemini 2.0 flash lite works", + async () => { + await llmGoogle("gemini-2.0-flash-lite-8k"); + }, + LLM_TIMEOUT, + ); + test( + "gemini 2.5 flash works", + async () => { + await llmGoogle("gemini-2.5-flash-8k"); + }, + LLM_TIMEOUT, + ); + test( + "gemini 2.5 pro works", + async () => { + await llmGoogle("gemini-2.5-pro-8k"); + }, + LLM_TIMEOUT, + ); +}); + +test_llm("mistralai")("Mistral AI", () => { + const small: MistralModel = "mistral-small-latest"; + const medium: MistralModel = "mistral-medium-latest"; + const large: MistralModel = "mistral-large-latest"; + + test("model", () => { + expect(isMistralModel(small)).toBe(true); + expect(isMistralModel(medium)).toBe(true); + expect(isMistralModel(large)).toBe(true); + }); + + test( + "small", + async () => { + const answer = USE_NEWER_LC_IMPL + ? await evaluateWithLangChain({ model: small, ...QUERY }) + : await evaluateMistral({ model: small, ...QUERY }); + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); + + test( + "medium", + async () => { + const answer = USE_NEWER_LC_IMPL + ? await evaluateWithLangChain({ model: medium, ...QUERY }) + : await evaluateMistral({ model: medium, ...QUERY }); + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); + + test( + "large", + async () => { + const answer = USE_NEWER_LC_IMPL + ? await evaluateWithLangChain({ model: large, ...QUERY }) + : await evaluateMistral({ model: large, ...QUERY }); + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); +}); + +test_llm("anthropic")("Anthropic", () => { + const haiku: AnthropicModel = "claude-3-5-haiku-8k"; + const sonnet: AnthropicModel = "claude-4-sonnet-8k"; + const opus: AnthropicModel = "claude-4-opus-8k"; + + test("model", () => { + expect(isAnthropicModel(haiku)).toBe(true); + expect(isAnthropicModel(sonnet)).toBe(true); + expect(isAnthropicModel(opus)).toBe(true); + }); + + test( + "haiku", + async () => { + const answer = USE_NEWER_LC_IMPL + ? await evaluateWithLangChain({ model: haiku, ...QUERY }) + : await evaluateAnthropic({ model: haiku, ...QUERY }); + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); + + test( + "sonnet", + async () => { + const answer = USE_NEWER_LC_IMPL + ? await evaluateWithLangChain({ model: sonnet, ...QUERY }) + : await evaluateAnthropic({ model: sonnet, ...QUERY }); + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); + + test( + "opus", + async () => { + const answer = USE_NEWER_LC_IMPL + ? await evaluateWithLangChain({ model: opus, ...QUERY }) + : await evaluateAnthropic({ model: opus, ...QUERY }); + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); +}); + +// User-defined LLM tests +describe("User-defined LLMs", () => { + const account_id = uuid(); + let accountCreated = false; + + beforeAll(async () => { + // Create test account only once for the entire describe block + if (!accountCreated) { + await createAccount({ + email: `test-${account_id}@example.com`, + password: "testpass123", + firstName: "Test", + lastName: "User", + account_id, + }); + accountCreated = true; + } + + // Enable user-defined LLMs server setting + await callback2(db().set_server_setting, { + name: "user_defined_llm", + value: "yes", + readonly: true, + }); + }); + + async function createUserDefinedLLMConfig(configs: UserDefinedLLM[]) { + const userDefinedLLMJson = JSON.stringify(configs); + const pool = getPool(); + await pool.query( + `UPDATE accounts SET other_settings = jsonb_set( + COALESCE(other_settings, '{}'::jsonb), + '{${OTHER_SETTINGS_USERDEFINED_LLM}}', + to_jsonb($1::text) + ) WHERE account_id = $2`, + [userDefinedLLMJson, account_id], + ); + } + + // Test user-defined OpenAI model + test( + "user-defined OpenAI model works", + async () => { + const openaiKey = process.env.COCALC_TEST_OPENAI_KEY; + if (!openaiKey) { + console.log("Skipping user-defined OpenAI test - no API key"); + return; + } + + const config: UserDefinedLLM = { + id: 1, + service: "openai", + display: "Test GPT-4o Mini", + endpoint: "https://api.openai.com/v1", + model: "gpt-4o-mini", + apiKey: openaiKey, + }; + + await createUserDefinedLLMConfig([config]); + + const userModel = toUserLLMModelName(config); + const answer = await evaluateUserDefinedLLM( + { + model: userModel, + ...QUERY, + }, + account_id, + ); + + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); + + // Test user-defined Google model + test( + "user-defined Google model works", + async () => { + const googleKey = process.env.COCALC_TEST_GOOGLE_GENAI_KEY; + if (!googleKey) { + console.log("Skipping user-defined Google test - no API key"); + return; + } + + const config: UserDefinedLLM = { + id: 2, + service: "google", + display: "Test Gemini Flash", + endpoint: "", + model: "gemini-1.5-flash", + apiKey: googleKey, + }; + + await createUserDefinedLLMConfig([config]); + + const userModel = toUserLLMModelName(config); + const answer = await evaluateUserDefinedLLM( + { + model: userModel, + ...QUERY, + }, + account_id, + ); + + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); + + // Test user-defined Anthropic model + test( + "user-defined Anthropic model works", + async () => { + const anthropicKey = process.env.COCALC_TEST_ANTHROPIC_KEY; + if (!anthropicKey) { + console.log("Skipping user-defined Anthropic test - no API key"); + return; + } + + const config: UserDefinedLLM = { + id: 3, + service: "anthropic", + display: "claude-3-5-haiku-latest", + endpoint: "", + model: "claude-3-5-haiku-latest", + apiKey: anthropicKey, + }; + + await createUserDefinedLLMConfig([config]); + + const userModel = toUserLLMModelName(config); + const answer = await evaluateUserDefinedLLM( + { + model: userModel, + ...QUERY, + }, + account_id, + ); + + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); + + // Test user-defined Mistral model + test( + "user-defined Mistral model works", + async () => { + const mistralKey = process.env.COCALC_TEST_MISTRAL_AI_KEY; + if (!mistralKey) { + console.log("Skipping user-defined Mistral test - no API key"); + return; + } + + const config: UserDefinedLLM = { + id: 4, + service: "mistralai", + display: "Test Mistral Small", + endpoint: "", + model: "mistral-small-latest", + apiKey: mistralKey, + }; + + await createUserDefinedLLMConfig([config]); + + const userModel = toUserLLMModelName(config); + const answer = await evaluateUserDefinedLLM( + { + model: userModel, + ...QUERY, + }, + account_id, + ); + + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); + + // Test user-defined custom OpenAI model + test( + "user-defined custom OpenAI model works", + async () => { + const openaiKey = process.env.COCALC_TEST_OPENAI_KEY; + if (!openaiKey) { + console.log("Skipping user-defined custom OpenAI test - no API key"); + return; + } + + const config: UserDefinedLLM = { + id: 5, + service: "custom_openai", + display: "Test Custom GPT-4o", + endpoint: "https://api.openai.com/v1", + model: "gpt-4o", + apiKey: openaiKey, + }; + + await createUserDefinedLLMConfig([config]); + + const userModel = toUserLLMModelName(config); + const answer = await evaluateUserDefinedLLM( + { + model: userModel, + ...QUERY, + }, + account_id, + ); + + checkAnswer(answer); + }, + LLM_TIMEOUT, + ); +}); diff --git a/src/packages/server/llm/user-defined.ts b/src/packages/server/llm/user-defined.ts index 3e846f0624f..c4670f56a06 100644 --- a/src/packages/server/llm/user-defined.ts +++ b/src/packages/server/llm/user-defined.ts @@ -16,11 +16,12 @@ import { } from "@cocalc/util/db-schema/llm-utils"; import { isValidUUID, unreachable } from "@cocalc/util/misc"; import type { History, Stream } from "@cocalc/util/types/llm"; -import { evaluateAnthropic } from "./anthropic"; import { evaluateCustomOpenAI } from "./custom-openai"; -import { evaluateGoogleGenAILC } from "./google-lc"; -import { evaluateMistral } from "./mistral"; import { evaluateOllama } from "./ollama"; +// import { evaluateWithLangChain } from "./evaluate-lc"; +import { evaluateAnthropic } from "./anthropic"; +import { evaluateMistral } from "./mistral"; +import { evaluateGoogleGenAILC } from "./google-lc"; import { evaluateOpenAILC } from "./openai-lc"; const log = getLogger("llm:userdefined"); @@ -65,6 +66,8 @@ export async function evaluateUserDefinedLLM( // and then construct the corresponding client (maybe with a use provided API key) // and call the appropriate evaluation function. For that, it mimics how the llm framework // usually calls an LLM. + // NOTE: evaluateWithLangChain "could" work after further refactoring. In particular, its + // getProviderConfig must be enhanced with a generalized way to configure based on provider, not model name const { service, endpoint, apiKey } = conf; switch (service) { case "custom_openai": { @@ -107,24 +110,56 @@ export async function evaluateUserDefinedLLM( { ...opts, model: um.model, apiKey: conf.apiKey }, "user", ); + // return await evaluateWithLangChain( + // { + // ...opts, + // model: um.model, + // apiKey: conf.apiKey, + // }, + // "user", + // ); case "mistralai": return await evaluateMistral( { ...opts, model: um.model, apiKey: conf.apiKey }, "user", ); + // return await evaluateWithLangChain( + // { + // ...opts, + // model: um.model, + // apiKey: conf.apiKey, + // }, + // "user", + // ); case "google": return await evaluateGoogleGenAILC( { ...opts, model: um.model, apiKey: conf.apiKey }, "user", ); + // return await evaluateWithLangChain( + // { + // ...opts, + // model: um.model, + // apiKey: conf.apiKey, + // }, + // "user", + // ); case "openai": return await evaluateOpenAILC( { ...opts, model: um.model, apiKey: conf.apiKey }, "user", ); + // return await evaluateWithLangChain( + // { + // ...opts, + // model: um.model, + // apiKey: conf.apiKey, + // }, + // "user", + // ); default: unreachable(service); diff --git a/src/packages/server/package.json b/src/packages/server/package.json index fd46d02793b..69809b69c05 100644 --- a/src/packages/server/package.json +++ b/src/packages/server/package.json @@ -55,12 +55,12 @@ "@google-cloud/storage-transfer": "^3.3.0", "@google/generative-ai": "^0.14.0", "@isaacs/ttlcache": "^1.4.1", - "@langchain/anthropic": "^0.3.18", - "@langchain/core": "^0.3.46", - "@langchain/google-genai": "^0.2.4", - "@langchain/mistralai": "^0.2.0", - "@langchain/ollama": "^0.2.0", - "@langchain/openai": "^0.5.5", + "@langchain/anthropic": "^0.3.24", + "@langchain/core": "^0.3.64", + "@langchain/google-genai": "^0.2.15", + "@langchain/mistralai": "^0.2.1", + "@langchain/ollama": "^0.2.3", + "@langchain/openai": "^0.6.1", "@node-saml/passport-saml": "^5.0.1", "@passport-js/passport-twitter": "^1.0.8", "@passport-next/passport-google-oauth2": "^1.0.0", @@ -97,7 +97,7 @@ "nanoid": "^3.3.8", "node-zendesk": "^5.0.13", "nodemailer": "^6.9.16", - "openai": "^4.95.1", + "openai": "^4.104.0", "parse-domain": "^5.0.0", "passport": "^0.6.0", "passport-activedirectory": "^1.0.4", diff --git a/src/packages/util/db-schema/llm-utils.test.ts b/src/packages/util/db-schema/llm-utils.test.ts index dd9c18eb08d..54fb8289ee7 100644 --- a/src/packages/util/db-schema/llm-utils.test.ts +++ b/src/packages/util/db-schema/llm-utils.test.ts @@ -125,9 +125,9 @@ describe("llm", () => { expect(getModel(DEFAULT_MODEL)).toEqual(DEFAULT_MODEL); expect(getModel("mistral-medium-latest")).toEqual(DEFAULT_MODEL); expect(getModel("mistral-large-latest")).toEqual("mistral-large-latest"); - expect(getModel("claude-3-haiku-8k")).toEqual("claude-3-haiku-8k"); + expect(getModel("claude-3-5-haiku-8k")).toEqual("claude-3-5-haiku-8k"); // anthropic service disabled - expect(getModel("claude-3-haiku-8k", "anthropic")).toEqual(DEFAULT_MODEL); + expect(getModel("claude-3-5-haiku-8k", "anthropic")).toEqual(DEFAULT_MODEL); // ollama expect(getModel("ollama-foo")).toEqual(DEFAULT_MODEL); expect(getModel("ollama-phi3")).toEqual("ollama-phi3"); @@ -142,8 +142,8 @@ describe("llm", () => { "user-openai-gpt-3.5-turbo", ); // it's ok to use a model if disabled by the admin, since it's their key - expect(getModel("user-anthropic-claude-3-haiku-8k", "anthropic")).toEqual( - "user-anthropic-claude-3-haiku-8k", + expect(getModel("user-anthropic-claude-3-5-haiku-8k", "anthropic")).toEqual( + "user-anthropic-claude-3-5-haiku-8k", ); // meaningless user service expect(getModel("user-baz-delta99")).toEqual(DEFAULT_MODEL); diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts index 33dbfeb8cee..0542461cfbb 100644 --- a/src/packages/util/db-schema/llm-utils.ts +++ b/src/packages/util/db-schema/llm-utils.ts @@ -151,10 +151,13 @@ export function isMistralModel(model: unknown): model is MistralModel { // $ curl -s "https://generativelanguage.googleapis.com/v1beta/models?key=$GOOGLE_GENAI" | jq export const GOOGLE_MODELS = [ "gemini-1.5-flash-8k", // introduced 2024-05-15 + "gemini-1.5-flash", // for user defined models "gemini-pro", // Discontinued Feb'25. Keep it to avoid breaking old references! "gemini-1.0-ultra", // hangs "gemini-1.5-pro-8k", // works now with langchaing "gemini-1.5-pro", // works now with langchaing + "gemini-2.5-flash-8k", + "gemini-2.5-pro-8k", "gemini-2.0-flash-8k", "gemini-2.0-flash-lite-8k", ] as const; @@ -168,33 +171,38 @@ export const GOOGLE_MODEL_TO_ID: Partial<{ [m in GoogleModel]: string }> = { "gemini-1.5-flash-8k": "gemini-1.5-flash-latest", "gemini-2.0-flash-8k": "gemini-2.0-flash", "gemini-2.0-flash-lite-8k": "gemini-2.0-flash-lite", + "gemini-2.5-flash-8k": "gemini-2.5-flash", + "gemini-2.5-pro-8k": "gemini-2.5-pro", } as const; -// https://docs.anthropic.com/claude/docs/models-overview -- stable names for the modesl ... +// https://docs.anthropic.com/en/docs/about-claude/models/overview -- stable names for the modesl ... export const ANTHROPIC_MODELS = [ "claude-3-5-sonnet", "claude-3-5-sonnet-4k", // added 2024-06-24 + "claude-3-5-haiku-8k", "claude-3-haiku", "claude-3-haiku-8k", // limited context window, offered for free "claude-3-sonnet", "claude-3-sonnet-4k", // limited context window, offered for free - "claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending "claude-3-opus", + "claude-3-opus-8k", // same issue as the large GPT models, limit the context window to limit spending + "claude-4-sonnet-8k", + "claude-4-opus-8k", ] as const; -const CLAUDE_SONNET_VERSION = "20240229"; -const CLAUDE_HAIKU_VERSION = "20240307"; -const CLAUDE_OPUS_VERSION = "20240229"; -const CLAUDE_SONNET_3_5_VERSION = "20240620"; -// ... and we add a version number (there is no "*-latest") when dispatching on the backend -export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string } = { - "claude-3-sonnet-4k": CLAUDE_SONNET_VERSION, - "claude-3-opus": CLAUDE_OPUS_VERSION, - "claude-3-opus-8k": CLAUDE_OPUS_VERSION, - "claude-3-sonnet": CLAUDE_SONNET_VERSION, - "claude-3-5-sonnet": CLAUDE_SONNET_3_5_VERSION, - "claude-3-5-sonnet-4k": CLAUDE_SONNET_3_5_VERSION, - "claude-3-haiku": CLAUDE_HAIKU_VERSION, - "claude-3-haiku-8k": CLAUDE_HAIKU_VERSION, +// https://docs.anthropic.com/en/docs/about-claude/models/overview#model-aliases +// if it points to null, the model is no longer supported +export const ANTHROPIC_VERSION: { [name in AnthropicModel]: string | null } = { + "claude-3-5-sonnet": "claude-3-5-sonnet-latest", + "claude-3-5-sonnet-4k": "claude-3-5-sonnet-latest", + "claude-3-5-haiku-8k": "claude-3-5-haiku-latest", + "claude-3-haiku": "claude-3-haiku-20240307", + "claude-3-haiku-8k": "claude-3-haiku-20240307", + "claude-4-sonnet-8k": "claude-sonnet-4-0", + "claude-4-opus-8k": "claude-opus-4-0", + "claude-3-sonnet": null, + "claude-3-sonnet-4k": null, + "claude-3-opus": null, + "claude-3-opus-8k": null, } as const; export const ANTHROPIC_PREFIX = "anthropic-"; export type AnthropicModel = (typeof ANTHROPIC_MODELS)[number]; @@ -237,7 +245,9 @@ export const USER_SELECTABLE_LLMS_BY_VENDOR: { m === "gpt-4o-8k" || m === "gpt-4o-mini-8k" || m === "gpt-4.1" || - m === "gpt-4.1-mini", + m === "gpt-4.1-mini" || + m === "o1-mini-8k" || + m === "o1-8k", // ATTN: there is code for o1 and o1-mini, but it does not work yet. // The API changed, there is no support for streaming, and it took @@ -248,18 +258,19 @@ export const USER_SELECTABLE_LLMS_BY_VENDOR: { google: GOOGLE_MODELS.filter( (m) => // we only enable 1.5 pro and 1.5 flash with a limited context window. - m === "gemini-1.5-pro-8k" || + //m === "gemini-1.5-pro-8k" || //m === "gemini-1.5-flash-8k" || - m === "gemini-2.0-flash-8k" || - m === "gemini-2.0-flash-lite-8k", + m === "gemini-2.0-flash-lite-8k" || + m === "gemini-2.5-flash-8k" || + m === "gemini-2.5-pro-8k", ), mistralai: MISTRAL_MODELS.filter((m) => m !== "mistral-medium-latest"), anthropic: ANTHROPIC_MODELS.filter((m) => { // we show opus and the context restricted models (to avoid high costs) return ( - m === "claude-3-opus-8k" || - m === "claude-3-5-sonnet-4k" || - m === "claude-3-haiku-8k" + m === "claude-3-5-haiku-8k" || + m === "claude-4-sonnet-8k" || + m === "claude-4-opus-8k" ); }), ollama: [], // this is empty, because these models are not hardcoded @@ -601,7 +612,7 @@ export function service2model_core( } // NOTE: do not use this – instead use server_settings.default_llm -export const DEFAULT_MODEL: LanguageModel = "gemini-2.0-flash-8k"; +export const DEFAULT_MODEL: LanguageModel = "gemini-2.5-flash-8k"; interface LLMVendor { name: LLMServiceName; @@ -737,21 +748,27 @@ export const LLM_USERNAMES: LLM2String = { "chat-bison-001": "PaLM 2", "gemini-pro": "Gemini 1.0 Pro", "gemini-1.0-ultra": "Gemini 1.0 Ultra", + "gemini-1.5-flash": "Gemini 1.5 Flash", "gemini-1.5-pro": "Gemini 1.5 Pro 1m", "gemini-1.5-pro-8k": "Gemini 1.5 Pro", "gemini-1.5-flash-8k": "Gemini 1.5 Flash", "gemini-2.0-flash-8k": "Gemini 2.0 Flash", "gemini-2.0-flash-lite-8k": "Gemini 2.0 Flash Lite", + "gemini-2.5-flash-8k": "Gemini 2.5 Flash", + "gemini-2.5-pro-8k": "Gemini 2.5 Pro", "mistral-small-latest": "Mistral AI Small", "mistral-medium-latest": "Mistral AI Medium", "mistral-large-latest": "Mistral AI Large", - "claude-3-haiku": "Claude 3 Haiku 200k", + "claude-3-haiku": "Claude 3 Haiku", "claude-3-haiku-8k": "Claude 3 Haiku", + "claude-3-5-haiku-8k": "Claude 3 Haiku", "claude-3-sonnet": "Claude 3 Sonnet 200k", "claude-3-sonnet-4k": "Claude 3 Sonnet", - "claude-3-5-sonnet": "Claude 3.5 Sonnet 200k", + "claude-3-5-sonnet": "Claude 3.5 Sonnet", "claude-3-5-sonnet-4k": "Claude 3.5 Sonnet", - "claude-3-opus": "Claude 3 Opus 200k", + "claude-4-sonnet-8k": "Claude 4 Sonnet", + "claude-4-opus-8k": "Claude 4 Opus", + "claude-3-opus": "Claude 3 Opus", "claude-3-opus-8k": "Claude 3 Opus", } as const; @@ -795,6 +812,7 @@ export const LLM_DESCR: LLM2String = { "Google's Gemini 1.0 Ultra Generative AI model (30k token context)", "gemini-1.5-pro": "Google's Gemini 1.5 Pro Generative AI model (1m token context)", + "gemini-1.5-flash": "Google's Gemini 1.5 Flash Generative AI model", "gemini-1.5-pro-8k": "Google's Gemini 1.5 Pro Generative AI model (8k token context)", "gemini-1.5-flash-8k": @@ -803,6 +821,10 @@ export const LLM_DESCR: LLM2String = { "Google's Gemini 2.0 Flash Generative AI model (8k token context)", "gemini-2.0-flash-lite-8k": "Google's Gemini 2.0 Flash Lite Generative AI model (8k token context)", + "gemini-2.5-flash-8k": + "Google's Gemini 2.5 Flash Generative AI model (8k token context)", + "gemini-2.5-pro-8k": + "Google's Gemini 2.5 Pro Generative AI model (8k token context)", "mistral-small-latest": "Fast, simple queries, short answers, less capabilities. (Mistral AI, 4k token context)", "mistral-medium-latest": @@ -815,10 +837,16 @@ export const LLM_DESCR: LLM2String = { "Fastest model, lightweight actions (Anthropic, 8k token context)", "claude-3-5-sonnet": "Our most intelligent model (Anthropic, 200k token context)", + "claude-3-sonnet": + "Our most intelligent model (Anthropic, 200k token context)", "claude-3-5-sonnet-4k": "Our most intelligent model (Anthropic, 4k token context)", - "claude-3-sonnet": - "Best combination of performance and speed (Anthropic, 200k token context)", + "claude-3-5-haiku-8k": + "Fastest model, lightweight actions (Anthropic, 8k token context)", + "claude-4-sonnet-8k": + "Best combination of performance and speed (Anthropic, 8k token context)", + "claude-4-opus-8k": + "Excels at writing and complex tasks (Anthropic, 8k token context)", "claude-3-sonnet-4k": "Best combination of performance and speed (Anthropic, 4k token context)", "claude-3-opus": @@ -984,25 +1012,25 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = { }, o1: { prompt_tokens: usd1Mtokens(15), - completion_tokens: usd1Mtokens(7.5), - max_tokens: 8192, // like gpt-4-turbo-8k - free: false, - }, - "o1-mini": { - prompt_tokens: usd1Mtokens(3), - completion_tokens: usd1Mtokens(1.5), + completion_tokens: usd1Mtokens(60), max_tokens: 8192, // like gpt-4-turbo-8k free: false, }, "o1-8k": { prompt_tokens: usd1Mtokens(15), - completion_tokens: usd1Mtokens(7.5), + completion_tokens: usd1Mtokens(60), max_tokens: 8192, // like gpt-4-turbo-8k free: false, }, "o1-mini-8k": { - prompt_tokens: usd1Mtokens(3), - completion_tokens: usd1Mtokens(1.5), + prompt_tokens: usd1Mtokens(1.1), + completion_tokens: usd1Mtokens(4.4), + max_tokens: 8192, // like gpt-4-turbo-8k + free: true, + }, + "o1-mini": { + prompt_tokens: usd1Mtokens(1.1), + completion_tokens: usd1Mtokens(4.4), max_tokens: 8192, // like gpt-4-turbo-8k free: false, }, @@ -1038,6 +1066,12 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = { max_tokens: 30720, free: true, }, + "gemini-1.5-flash": { + prompt_tokens: usd1Mtokens(0.075), + completion_tokens: usd1Mtokens(0.3), + max_tokens: 8_000, + free: true, + }, "gemini-1.5-flash-8k": { prompt_tokens: usd1Mtokens(0.075), completion_tokens: usd1Mtokens(0.3), @@ -1057,6 +1091,18 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = { max_tokens: 8_000, free: true, }, + "gemini-2.5-flash-8k": { + prompt_tokens: usd1Mtokens(0.3), + completion_tokens: usd1Mtokens(2.5), + max_tokens: 8_000, + free: true, + }, + "gemini-2.5-pro-8k": { + prompt_tokens: usd1Mtokens(1.25), + completion_tokens: usd1Mtokens(10), + max_tokens: 8_000, + free: false, + }, // https://mistral.ai/technology/ "mistral-small-latest": { prompt_tokens: usd1Mtokens(0.2), @@ -1105,7 +1151,7 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = { prompt_tokens: usd1Mtokens(3), completion_tokens: usd1Mtokens(15), max_tokens: 4_000, // limited to 4k tokens, offered for free - free: true, + free: false, }, "claude-3-sonnet": { prompt_tokens: usd1Mtokens(3), @@ -1122,7 +1168,25 @@ export const LLM_COST: { [name in LanguageModelCore]: Cost } = { "claude-3-haiku": { prompt_tokens: usd1Mtokens(0.8), completion_tokens: usd1Mtokens(4), - max_tokens: 200_000, + max_tokens: 8_000, // limited to 8k tokens, offered for free + free: true, + }, + "claude-3-5-haiku-8k": { + prompt_tokens: usd1Mtokens(0.8), + completion_tokens: usd1Mtokens(4), + max_tokens: 8_000, + free: true, + }, + "claude-4-sonnet-8k": { + prompt_tokens: usd1Mtokens(3), + completion_tokens: usd1Mtokens(15), + max_tokens: 8_000, + free: false, + }, + "claude-4-opus-8k": { + prompt_tokens: usd1Mtokens(15), + completion_tokens: usd1Mtokens(75), + max_tokens: 8_000, free: false, }, } as const; diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts index becc3377535..a8c456ee016 100644 --- a/src/packages/util/db-schema/purchase-quotas.ts +++ b/src/packages/util/db-schema/purchase-quotas.ts @@ -37,9 +37,14 @@ export function isPaygService(service: Service): boolean { return IS_PAYG[category ?? ""] ?? false; } +const GOOGLE_AI_COLOR = "#ff4d4f"; +const ANTHROPIC_COLOR = "#181818"; +const OPENAI_COLOR = "#10a37f"; +const MISTRALAI_COLOR = "#ff7000"; + const GPT_TURBO_128k: Spec = { display: "OpenAI GPT-4 Turbo 128k", - color: "#10a37f", + color: OPENAI_COLOR, category: "ai", } as const; @@ -50,7 +55,7 @@ const GPT_TURBO_8K: Spec = { const GPT_OMNI_128k: Spec = { display: "OpenAI GPT-4o 128k", - color: "#10a37f", + color: OPENAI_COLOR, category: "ai", } as const; @@ -71,7 +76,7 @@ const GPT_OMNI_MINI_8K: Spec = { const GPT_41_8K: Spec = { display: "OpenAI GPT-4.1", - color: "#10a37f", + color: OPENAI_COLOR, category: "ai", } as const; @@ -90,8 +95,6 @@ const GPT_O1_MINI_8K: Spec = { display: "OpenAI o1 mini", } as const; -const GOOGLE_AI_COLOR = "#ff4d4f"; - // NOTE: all-quotas-config.tsx will automatically filter out those, which are free or not selectable by the user export const QUOTA_SPEC: QuotaSpec = { credit: { @@ -159,26 +162,30 @@ export const QUOTA_SPEC: QuotaSpec = { description: "Charge for purchasing a voucher.", }, // ATTN: LLMs comes below this line, the quotas above are the important ones to show first! - "openai-gpt-4": { display: "OpenAI GPT-4", color: "#10a37f", category: "ai" }, + "openai-gpt-4": { + display: "OpenAI GPT-4", + color: OPENAI_COLOR, + category: "ai", + }, "openai-gpt-3.5-turbo": { display: "OpenAI GPT-3.5", - color: "#10a37f", + color: OPENAI_COLOR, category: "ai", }, "openai-gpt-3.5-turbo-16k": { display: "OpenAI GPT-3.5 16k", - color: "#10a37f", + color: OPENAI_COLOR, category: "ai", }, "openai-text-embedding-ada-002": { display: "OpenAI Text Embedding Ada 002", - color: "#10a37f", + color: OPENAI_COLOR, noSet: true, // because this model is not user visible yet category: "ai", }, "openai-gpt-4-32k": { display: "OpenAI GPT-4 32k", - color: "#10a37f", + color: OPENAI_COLOR, category: "ai", }, "openai-gpt-4-turbo-preview": GPT_TURBO_128k, // the "preview" is over @@ -213,6 +220,11 @@ export const QUOTA_SPEC: QuotaSpec = { noSet: true, // deprecated, will be removed category: "ai", }, + "google-gemini-1.5-flash": { + display: "Google Gemini 1.5 Flash", + color: GOOGLE_AI_COLOR, + category: "ai", + }, "google-gemini-1.5-flash-8k": { display: "Google Gemini 1.5 Flash", color: GOOGLE_AI_COLOR, @@ -248,59 +260,84 @@ export const QUOTA_SPEC: QuotaSpec = { color: GOOGLE_AI_COLOR, category: "ai", }, + "google-gemini-2.5-flash-8k": { + display: LLM_USERNAMES["gemini-2.5-flash-8k"], + color: GOOGLE_AI_COLOR, + category: "ai", + }, + "google-gemini-2.5-pro-8k": { + display: LLM_USERNAMES["gemini-2.5-pro-8k"], + color: GOOGLE_AI_COLOR, + category: "ai", + }, "anthropic-claude-3-opus": { display: LLM_USERNAMES["claude-3-opus"], - color: "#181818", + color: ANTHROPIC_COLOR, category: "ai", }, "anthropic-claude-3-opus-8k": { display: LLM_USERNAMES["claude-3-opus-8k"], - color: "#181818", + color: ANTHROPIC_COLOR, category: "ai", }, "anthropic-claude-3-sonnet": { display: LLM_USERNAMES["claude-3-sonnet"], - color: "#181818", + color: ANTHROPIC_COLOR, category: "ai", }, "anthropic-claude-3-sonnet-4k": { display: LLM_USERNAMES["claude-3-sonnet-4k"], - color: "#181818", + color: ANTHROPIC_COLOR, category: "ai", }, "anthropic-claude-3-5-sonnet": { display: LLM_USERNAMES["claude-3-5-sonnet"], - color: "#181818", + color: ANTHROPIC_COLOR, category: "ai", }, "anthropic-claude-3-5-sonnet-4k": { display: LLM_USERNAMES["claude-3-5-sonnet-4k"], - color: "#181818", + color: ANTHROPIC_COLOR, category: "ai", }, "anthropic-claude-3-haiku": { display: LLM_USERNAMES["claude-3-haiku"], - color: "#181818", + color: ANTHROPIC_COLOR, category: "ai", }, "anthropic-claude-3-haiku-8k": { display: LLM_USERNAMES["claude-3-haiku-8k"], - color: "#181818", + color: ANTHROPIC_COLOR, + category: "ai", + }, + "anthropic-claude-3-5-haiku-8k": { + display: LLM_USERNAMES["claude-3-5-haiku-8k"], + color: ANTHROPIC_COLOR, + category: "ai", + }, + "anthropic-claude-4-sonnet-8k": { + display: LLM_USERNAMES["claude-4-sonnet-8k"], + color: ANTHROPIC_COLOR, + category: "ai", + }, + "anthropic-claude-4-opus-8k": { + display: LLM_USERNAMES["claude-4-opus-8k"], + color: ANTHROPIC_COLOR, category: "ai", }, "mistralai-mistral-small-latest": { display: LLM_USERNAMES["mistral-small-latest"], - color: "#ff7000", // the orange from their website + color: MISTRALAI_COLOR, // the orange from their website category: "ai", }, "mistralai-mistral-medium-latest": { display: LLM_USERNAMES["mistral-medium-latest"], - color: "#ff7000", // the orange from their website + color: MISTRALAI_COLOR, // the orange from their website category: "ai", }, "mistralai-mistral-large-latest": { display: LLM_USERNAMES["mistral-large-latest"], - color: "#ff7000", // the orange from their website + color: MISTRALAI_COLOR, // the orange from their website category: "ai", }, "project-upgrade": { diff --git a/src/packages/util/db-schema/site-settings-extras.ts b/src/packages/util/db-schema/site-settings-extras.ts index f6c01efe932..09db8b45bac 100644 --- a/src/packages/util/db-schema/site-settings-extras.ts +++ b/src/packages/util/db-schema/site-settings-extras.ts @@ -339,7 +339,7 @@ export const EXTRAS: SettingsExtras = { // This is very similar to the ollama config, but there are small differences in the details. custom_openai_configuration: { name: "Custom OpenAI Endpoints", - desc: 'Configure OpenAI endpoints, queried via [@langchain/openai (Node.js)](https://js.langchain.com/v0.1/docs/integrations/llms/openai/). e.g. `{"myllm" : {"baseUrl": "http://1.2.3.4:5678/" , apiKey: "key...", cocalc: {display: "My LLM", desc: "My custom LLM", icon: "https://.../...png"}}, "gpt-4o-high": {baseUrl: "https://api.openai.com/v1", temperature: 1.5, "openAIApiKey": "sk-...", "model": "gpt-4o", cocalc: {display: "High GPT-4 Omni", desc: "GPT 4 Omni High Temp"}}}`', + desc: 'Configure OpenAI endpoints, queried via [@langchain/openai (Node.js)](https://js.langchain.com/v0.1/docs/integrations/llms/openai/). e.g. `{"myllm" : {"baseUrl": "http://1.2.3.4:5678/" , apiKey: "key...", cocalc: {display: "My LLM", desc: "My custom LLM", icon: "https://.../...png"}}, "gpt-4o-high": {baseUrl: "https://api.openai.com/v1", temperature: 1.5, "apiKey": "sk-...", "model": "gpt-4o", cocalc: {display: "High GPT-4 Omni", desc: "GPT 4 Omni High Temp"}}}`', default: "{}", multiline: 5, show: custom_openai_enabled,