diff --git a/libs/langchain-azure-cosmosdb/.env.example b/libs/langchain-azure-cosmosdb/.env.example deleted file mode 100644 index c4003b2a2b8a..000000000000 --- a/libs/langchain-azure-cosmosdb/.env.example +++ /dev/null @@ -1,18 +0,0 @@ -# Azure CosmosDB for NoSQL connection string -AZURE_COSMOSDB_NOSQL_CONNECTION_STRING= - -# Azure CosmosDB for NoSQL endpoint (if you're using managed identity) -AZURE_COSMOSDB_NOSQL_ENDPOINT= - -# Azure CosmosDB for MongoDB vCore connection string -AZURE_COSMOSDB_MONGODB_CONNECTION_STRING= - -# If you're using Azure OpenAI API, you'll need to set these variables -AZURE_OPENAI_API_KEY= -AZURE_OPENAI_API_INSTANCE_NAME= -AZURE_OPENAI_API_DEPLOYMENT_NAME= -AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME= -AZURE_OPENAI_API_VERSION= - -# Or you can use the OpenAI API directly -OPENAI_API_KEY= diff --git a/libs/langchain-azure-cosmosdb/.eslintrc.cjs b/libs/langchain-azure-cosmosdb/.eslintrc.cjs deleted file mode 100644 index 344f8a9d6cd9..000000000000 --- a/libs/langchain-azure-cosmosdb/.eslintrc.cjs +++ /dev/null @@ -1,66 +0,0 @@ -module.exports = { - extends: [ - "airbnb-base", - "eslint:recommended", - "prettier", - "plugin:@typescript-eslint/recommended", - ], - parserOptions: { - ecmaVersion: 12, - parser: "@typescript-eslint/parser", - project: "./tsconfig.json", - sourceType: "module", - }, - plugins: ["@typescript-eslint", "no-instanceof"], - ignorePatterns: [ - ".eslintrc.cjs", - "scripts", - "node_modules", - "dist", - "dist-cjs", - "*.js", - "*.cjs", - "*.d.ts", - ], - rules: { - "no-process-env": 2, - "no-instanceof/no-instanceof": 2, - "@typescript-eslint/explicit-module-boundary-types": 0, - "@typescript-eslint/no-empty-function": 0, - "@typescript-eslint/no-shadow": 0, - "@typescript-eslint/no-empty-interface": 0, - "@typescript-eslint/no-use-before-define": ["error", "nofunc"], - "@typescript-eslint/no-unused-vars": ["warn", { args: "none" }], - "@typescript-eslint/no-floating-promises": "error", - "@typescript-eslint/no-misused-promises": "error", - camelcase: 0, - "class-methods-use-this": 0, - "import/extensions": [2, "ignorePackages"], - "import/no-extraneous-dependencies": [ - "error", - { devDependencies: ["**/*.test.ts"] }, - ], - "import/no-unresolved": 0, - "import/prefer-default-export": 0, - "keyword-spacing": "error", - "max-classes-per-file": 0, - "max-len": 0, - "no-await-in-loop": 0, - "no-bitwise": 0, - "no-console": 0, - "no-restricted-syntax": 0, - "no-shadow": 0, - "no-continue": 0, - "no-void": 0, - "no-underscore-dangle": 0, - "no-use-before-define": 0, - "no-useless-constructor": 0, - "no-return-await": 0, - "consistent-return": 0, - "no-else-return": 0, - "func-names": 0, - "no-lonely-if": 0, - "prefer-rest-params": 0, - "new-cap": ["error", { properties: false, capIsNew: false }], - }, -}; diff --git a/libs/langchain-azure-cosmosdb/.gitignore b/libs/langchain-azure-cosmosdb/.gitignore deleted file mode 100644 index c10034e2f1be..000000000000 --- a/libs/langchain-azure-cosmosdb/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -index.cjs -index.js -index.d.ts -index.d.cts -node_modules -dist -.yarn diff --git a/libs/langchain-azure-cosmosdb/.prettierrc b/libs/langchain-azure-cosmosdb/.prettierrc deleted file mode 100644 index ba08ff04f677..000000000000 --- a/libs/langchain-azure-cosmosdb/.prettierrc +++ /dev/null @@ -1,19 +0,0 @@ -{ - "$schema": "https://json.schemastore.org/prettierrc", - "printWidth": 80, - "tabWidth": 2, - "useTabs": false, - "semi": true, - "singleQuote": false, - "quoteProps": "as-needed", - "jsxSingleQuote": false, - "trailingComma": "es5", - "bracketSpacing": true, - "arrowParens": "always", - "requirePragma": false, - "insertPragma": false, - "proseWrap": "preserve", - "htmlWhitespaceSensitivity": "css", - "vueIndentScriptAndStyle": false, - "endOfLine": "lf" -} diff --git a/libs/langchain-azure-cosmosdb/.release-it.json b/libs/langchain-azure-cosmosdb/.release-it.json deleted file mode 100644 index 522ee6abf705..000000000000 --- a/libs/langchain-azure-cosmosdb/.release-it.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "github": { - "release": true, - "autoGenerate": true, - "tokenRef": "GITHUB_TOKEN_RELEASE" - }, - "npm": { - "versionArgs": ["--workspaces-update=false"] - } -} diff --git a/libs/langchain-azure-cosmosdb/CHANGELOG.md b/libs/langchain-azure-cosmosdb/CHANGELOG.md deleted file mode 100644 index 8f7ecebac772..000000000000 --- a/libs/langchain-azure-cosmosdb/CHANGELOG.md +++ /dev/null @@ -1,13 +0,0 @@ -# @langchain/azure-cosmosdb - -## 0.2.10 - -### Patch Changes - -- fd4691f: use `keyEncoder` instead of insecure cache key getter - -## 0.2.9 - -### Patch Changes - -- 57f1250: Fix missing init while retrieving context in AzureCosmsosDBNoSQLChatMessageHistory diff --git a/libs/langchain-azure-cosmosdb/LICENSE b/libs/langchain-azure-cosmosdb/LICENSE deleted file mode 100644 index 8cd8f501eb49..000000000000 --- a/libs/langchain-azure-cosmosdb/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License - -Copyright (c) 2023 LangChain - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/libs/langchain-azure-cosmosdb/README.md b/libs/langchain-azure-cosmosdb/README.md index 1e21ecfdf8b8..931bd49ff1f4 100644 --- a/libs/langchain-azure-cosmosdb/README.md +++ b/libs/langchain-azure-cosmosdb/README.md @@ -1,59 +1,3 @@ -# @langchain/azure-cosmosdb +# @langchain/azure-cosmosdb -This package contains the [Azure CosmosDB](https://learn.microsoft.com/azure/cosmos-db/) vector store integrations. - -Learn more about how to use this package in the LangChain documentation: -- [Azure CosmosDB for NoSQL](https://js.langchain.com/docs/integrations/vector_stores/azure_cosmosdb_nosql) -- [Azure CosmosDB for MongoDB vCore](https://js.langchain.com/docs/integrations/vector_stores/azure_cosmosdb_mongodb) - -## Installation - -```bash npm2yarn -npm install @langchain/azure-cosmosdb @langchain/core -``` - -This package, along with the main LangChain package, depends on [`@langchain/core`](https://npmjs.com/package/@langchain/core/). -If you are using this package with other LangChain packages, you should make sure that all of the packages depend on the same instance of @langchain/core. -You can do so by adding appropriate fields to your project's `package.json` like this: - -```json -{ - "name": "your-project", - "version": "0.0.0", - "dependencies": { - "@langchain/core": "^0.3.0", - "@langchain/azure-cosmosdb": "^0.2.5" - }, - "resolutions": { - "@langchain/core": "0.3.0" - }, - "overrides": { - "@langchain/core": "0.3.0" - }, - "pnpm": { - "overrides": { - "@langchain/core": "0.3.0" - } - } -} -``` - -The field you need depends on the package manager you're using, but we recommend adding a field for the common `yarn`, `npm`, and `pnpm` to maximize compatibility. - -## Usage - -```typescript -import { AzureCosmosDBNoSQLVectorStore } from "@langchain/azure-cosmosdb"; - -const store = await AzureCosmosDBNoSQLVectorStore.fromDocuments( - ["Hello, World!"], - new OpenAIEmbeddings(), - { - databaseName: "langchain", - containerName: "documents", - } -); - -const resultDocuments = await store.similaritySearch("hello"); -console.log(resultDocuments[0].pageContent); -``` +This package has moved and its code is now available in the dedicated [langchain-azure-js repository](https://github.com/langchain-ai/langchain-azure-js/tree/migrate-packages/libs/langchain-azure-cosmosdb). diff --git a/libs/langchain-azure-cosmosdb/jest.config.cjs b/libs/langchain-azure-cosmosdb/jest.config.cjs deleted file mode 100644 index 994826496bc5..000000000000 --- a/libs/langchain-azure-cosmosdb/jest.config.cjs +++ /dev/null @@ -1,21 +0,0 @@ -/** @type {import('ts-jest').JestConfigWithTsJest} */ -module.exports = { - preset: "ts-jest/presets/default-esm", - testEnvironment: "./jest.env.cjs", - modulePathIgnorePatterns: ["dist/", "docs/"], - moduleNameMapper: { - "^(\\.{1,2}/.*)\\.js$": "$1", - }, - transform: { - "^.+\\.tsx?$": ["@swc/jest"], - }, - transformIgnorePatterns: [ - "/node_modules/", - "\\.pnp\\.[^\\/]+$", - "./scripts/jest-setup-after-env.js", - ], - setupFiles: ["dotenv/config"], - testTimeout: 20_000, - passWithNoTests: true, - collectCoverageFrom: ["src/**/*.ts"], -}; diff --git a/libs/langchain-azure-cosmosdb/jest.env.cjs b/libs/langchain-azure-cosmosdb/jest.env.cjs deleted file mode 100644 index 2ccedccb8672..000000000000 --- a/libs/langchain-azure-cosmosdb/jest.env.cjs +++ /dev/null @@ -1,12 +0,0 @@ -const { TestEnvironment } = require("jest-environment-node"); - -class AdjustedTestEnvironmentToSupportFloat32Array extends TestEnvironment { - constructor(config, context) { - // Make `instanceof Float32Array` return true in tests - // to avoid https://github.com/xenova/transformers.js/issues/57 and https://github.com/jestjs/jest/issues/2549 - super(config, context); - this.global.Float32Array = Float32Array; - } -} - -module.exports = AdjustedTestEnvironmentToSupportFloat32Array; diff --git a/libs/langchain-azure-cosmosdb/langchain.config.js b/libs/langchain-azure-cosmosdb/langchain.config.js deleted file mode 100644 index 46b1a2b31264..000000000000 --- a/libs/langchain-azure-cosmosdb/langchain.config.js +++ /dev/null @@ -1,22 +0,0 @@ -import { resolve, dirname } from "node:path"; -import { fileURLToPath } from "node:url"; - -/** - * @param {string} relativePath - * @returns {string} - */ -function abs(relativePath) { - return resolve(dirname(fileURLToPath(import.meta.url)), relativePath); -} - -export const config = { - internals: [/node\:/, /@langchain\/core\//], - entrypoints: { - index: "index", - }, - requiresOptionalDependency: [], - tsConfigPath: resolve("./tsconfig.json"), - cjsSource: "./dist-cjs", - cjsDestination: "./dist", - abs, -}; diff --git a/libs/langchain-azure-cosmosdb/package.json b/libs/langchain-azure-cosmosdb/package.json deleted file mode 100644 index 069a8e0ec68e..000000000000 --- a/libs/langchain-azure-cosmosdb/package.json +++ /dev/null @@ -1,90 +0,0 @@ -{ - "name": "@langchain/azure-cosmosdb", - "version": "0.2.10", - "description": "Azure CosmosDB integration for LangChain.js", - "type": "module", - "engines": { - "node": ">=18" - }, - "main": "./index.js", - "types": "./index.d.ts", - "repository": { - "type": "git", - "url": "git@github.com:langchain-ai/langchainjs.git" - }, - "homepage": "https://github.com/langchain-ai/langchainjs/tree/main/libs/langchain-azure-cosmosdb/", - "scripts": { - "build": "yarn turbo:command build:internal --filter=@langchain/azure-cosmosdb", - "build:internal": "yarn lc_build --create-entrypoints --pre --tree-shaking", - "lint:eslint": "NODE_OPTIONS=--max-old-space-size=4096 eslint --cache --ext .ts,.js src/", - "lint:dpdm": "dpdm --skip-dynamic-imports circular --exit-code circular:1 --no-warning --no-tree src/*.ts src/**/*.ts", - "lint": "yarn lint:eslint && yarn lint:dpdm", - "lint:fix": "yarn lint:eslint --fix && yarn lint:dpdm", - "clean": "rm -rf dist/ .turbo", - "prepack": "yarn build", - "test": "NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%", - "test:watch": "NODE_OPTIONS=--experimental-vm-modules jest --watch --testPathIgnorePatterns=\\.int\\.test.ts", - "test:single": "NODE_OPTIONS=--experimental-vm-modules yarn run jest --config jest.config.cjs --testTimeout 100000", - "test:int": "NODE_OPTIONS=--experimental-vm-modules jest --testPathPattern=\\.int\\.test.ts --testTimeout 100000 --maxWorkers=50%", - "format": "prettier --config .prettierrc --write \"src\"", - "format:check": "prettier --config .prettierrc --check \"src\"" - }, - "author": "LangChain", - "license": "MIT", - "dependencies": { - "@azure/cosmos": "^4.2.0", - "@azure/identity": "^4.5.0", - "mongodb": "^6.17.0" - }, - "peerDependencies": { - "@langchain/core": ">=0.2.21 <0.4.0" - }, - "devDependencies": { - "@jest/globals": "^29.5.0", - "@langchain/core": "workspace:*", - "@langchain/openai": "workspace:^", - "@langchain/scripts": ">=0.1.0 <0.2.0", - "@swc/core": "^1.3.90", - "@swc/jest": "^0.2.29", - "@tsconfig/recommended": "^1.0.3", - "@typescript-eslint/eslint-plugin": "^6.12.0", - "@typescript-eslint/parser": "^6.12.0", - "dotenv": "^16.4.5", - "dpdm": "^3.14.0", - "eslint": "^8.33.0", - "eslint-config-airbnb-base": "^15.0.0", - "eslint-config-prettier": "^8.6.0", - "eslint-plugin-import": "^2.27.5", - "eslint-plugin-no-instanceof": "^1.0.1", - "eslint-plugin-prettier": "^4.2.1", - "jest": "^29.5.0", - "jest-environment-node": "^29.6.4", - "prettier": "^2.8.3", - "release-it": "^18.1.2", - "rollup": "^4.5.2", - "ts-jest": "^29.1.0", - "typescript": "~5.8.3" - }, - "publishConfig": { - "access": "public" - }, - "exports": { - ".": { - "types": { - "import": "./index.d.ts", - "require": "./index.d.cts", - "default": "./index.d.ts" - }, - "import": "./index.js", - "require": "./index.cjs" - }, - "./package.json": "./package.json" - }, - "files": [ - "dist/", - "index.cjs", - "index.js", - "index.d.ts", - "index.d.cts" - ] -} diff --git a/libs/langchain-azure-cosmosdb/scripts/jest-setup-after-env.js b/libs/langchain-azure-cosmosdb/scripts/jest-setup-after-env.js deleted file mode 100644 index 7323083d0ea5..000000000000 --- a/libs/langchain-azure-cosmosdb/scripts/jest-setup-after-env.js +++ /dev/null @@ -1,9 +0,0 @@ -import { awaitAllCallbacks } from "@langchain/core/callbacks/promises"; -import { afterAll, jest } from "@jest/globals"; - -afterAll(awaitAllCallbacks); - -// Allow console.log to be disabled in tests -if (process.env.DISABLE_CONSOLE_LOGS === "true") { - console.log = jest.fn(); -} diff --git a/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts b/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts deleted file mode 100644 index b21fc4b85c79..000000000000 --- a/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts +++ /dev/null @@ -1,547 +0,0 @@ -import { - ObjectId, - Collection, - Document as MongoDBDocument, - MongoClient, - Db, - Filter, -} from "mongodb"; -import type { EmbeddingsInterface } from "@langchain/core/embeddings"; -import { - MaxMarginalRelevanceSearchOptions, - VectorStore, -} from "@langchain/core/vectorstores"; -import { Document, DocumentInterface } from "@langchain/core/documents"; -import { maximalMarginalRelevance } from "@langchain/core/utils/math"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; - -/** Azure Cosmos DB for MongoDB vCore Similarity type. */ -export const AzureCosmosDBMongoDBSimilarityType = { - /** Cosine similarity */ - COS: "COS", - /** Inner - product */ - IP: "IP", - /** Euclidian distance */ - L2: "L2", -} as const; - -/** Azure Cosmos DB for MongoDB vCore Similarity type. */ -export type AzureCosmosDBMongoDBSimilarityType = - (typeof AzureCosmosDBMongoDBSimilarityType)[keyof typeof AzureCosmosDBMongoDBSimilarityType]; - -/** Azure Cosmos DB for MongoDB vCore Index Options. */ -export type AzureCosmosDBMongoDBIndexOptions = { - /** Skips automatic index creation. */ - readonly skipCreate?: boolean; - - readonly indexType?: "ivf" | "hnsw" | "diskann"; - /** Number of clusters that the inverted file (IVF) index uses to group the vector data. */ - readonly numLists?: number; - /** Number of dimensions for vector similarity. */ - readonly dimensions?: number; - /** Similarity metric to use with the IVF index. */ - readonly similarity?: AzureCosmosDBMongoDBSimilarityType; - /** The max number of connections per layer with the HNSW index. */ - readonly m?: number; - /** The size of the dynamic candidate list for constructing the graph with the HNSW index. */ - readonly efConstruction?: number; - /** Max number of neighbors withe the Diskann idnex */ - readonly maxDegree?: number; - /** L value for index building withe the Diskann idnex */ - readonly lBuild?: number; - /** L value for index searching withe the Diskann idnex */ - readonly lSearch?: number; -}; - -/** Azure Cosmos DB for MongoDB vCore delete Parameters. */ -export type AzureCosmosDBMongoDBDeleteParams = { - /** List of IDs for the documents to be removed. */ - readonly ids?: string | string[]; - /** MongoDB filter object or list of IDs for the documents to be removed. */ - readonly filter?: Filter; -}; - -/** Configuration options for the `AzureCosmosDBMongoDBVectorStore` constructor. */ -export interface AzureCosmosDBMongoDBConfig { - readonly client?: MongoClient; - readonly connectionString?: string; - readonly databaseName?: string; - readonly collectionName?: string; - readonly indexName?: string; - readonly textKey?: string; - readonly embeddingKey?: string; - readonly indexOptions?: AzureCosmosDBMongoDBIndexOptions; -} - -/** - * Azure Cosmos DB for MongoDB vCore vector store. - * To use this, you should have both: - * - the `mongodb` NPM package installed - * - a connection string associated with a MongoDB VCore Cluster - * - * You do not need to create a database or collection, it will be created - * automatically. - * - * You also need an index on the collection, which is by default be created - * automatically using the `createIndex` method. - */ -export class AzureCosmosDBMongoDBVectorStore extends VectorStore { - get lc_secrets(): { [key: string]: string } { - return { - connectionString: "AZURE_COSMOSDB_MONGODB_CONNECTION_STRING", - }; - } - - private connectPromise: Promise; - - private initPromise?: Promise; - - private readonly client: MongoClient | undefined; - - private database: Db; - - private collection: Collection; - - readonly indexName: string; - - readonly textKey: string; - - readonly embeddingKey: string; - - private readonly indexOptions: AzureCosmosDBMongoDBIndexOptions; - - /** - * Initializes the AzureCosmosDBMongoDBVectorStore. - * Connect the client to the database and create the container, creating them if needed. - * @returns A promise that resolves when the AzureCosmosDBMongoDBVectorStore has been initialized. - */ - initialize: () => Promise; - - _vectorstoreType(): string { - return "azure_cosmosdb_mongodb"; - } - - constructor( - embeddings: EmbeddingsInterface, - dbConfig: AzureCosmosDBMongoDBConfig - ) { - super(embeddings, dbConfig); - - const connectionString = - dbConfig.connectionString ?? - getEnvironmentVariable("AZURE_COSMOSDB_MONGODB_CONNECTION_STRING"); - - if (!dbConfig.client && !connectionString) { - throw new Error( - "AzureCosmosDBMongoDBVectorStore client or connection string must be set." - ); - } - - if (!dbConfig.client) { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - this.client = new MongoClient(connectionString!, { - appName: "langchainjs", - }); - } - - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const client = dbConfig.client || this.client!; - const databaseName = dbConfig.databaseName ?? "documentsDB"; - const collectionName = dbConfig.collectionName ?? "documents"; - this.indexName = dbConfig.indexName ?? "vectorSearchIndex"; - this.textKey = dbConfig.textKey ?? "textContent"; - this.embeddingKey = dbConfig.embeddingKey ?? "vectorContent"; - this.indexOptions = dbConfig.indexOptions ?? {}; - - // Deferring initialization to the first call to `initialize` - this.initialize = () => { - if (this.initPromise === undefined) { - this.initPromise = this.init( - client, - databaseName, - collectionName - ).catch((error) => { - console.error( - "Error during AzureCosmosDBMongoDBVectorStore initialization:", - error - ); - }); - } - - return this.initPromise; - }; - } - - /** - * Checks if the specified index name during instance construction exists - * on the collection. - * @returns A promise that resolves to a boolean indicating if the index exists. - */ - async checkIndexExists(): Promise { - await this.initialize(); - const indexes = await this.collection.listIndexes().toArray(); - return indexes.some((index) => index.name === this.indexName); - } - - /** - * Deletes the index specified during instance construction if it exists. - * @returns A promise that resolves when the index has been deleted. - */ - async deleteIndex(): Promise { - await this.initialize(); - if (await this.checkIndexExists()) { - await this.collection.dropIndex(this.indexName); - } - } - - /** - * Creates an index on the collection with the specified index name during - * instance construction. - * - * Setting the numLists parameter correctly is important for achieving good - * accuracy and performance. - * Since the vector store uses IVF as the indexing strategy, you should - * create the index only after you have loaded a large enough sample - * documents to ensure that the centroids for the respective buckets are - * faily distributed. - * - * We recommend that numLists is set to documentCount/1000 for up to - * 1 million documents and to sqrt(documentCount) for more than 1 million - * documents. - * As the number of items in your database grows, you should tune numLists - * to be larger in order to achieve good latency performance for vector - * search. - * - * If you're experimenting with a new scenario or creating a small demo, - * you can start with numLists set to 1 to perform a brute-force search - * across all vectors. - * This should provide you with the most accurate results from the vector - * search, however be aware that the search speed and latency will be slow. - * After your initial setup, you should go ahead and tune the numLists - * parameter using the above guidance. - * @param numLists This integer is the number of clusters that the inverted - * file (IVF) index uses to group the vector data. - * We recommend that numLists is set to documentCount/1000 for up to - * 1 million documents and to sqrt(documentCount) for more than 1 million - * documents. - * Using a numLists value of 1 is akin to performing brute-force search, - * which has limited performance - * @param indexType Index Type for Mongo vCore index. - * @param dimensions Number of dimensions for vector similarity. - * The maximum number of supported dimensions is 2000. - * If no number is provided, it will be determined automatically by - * embedding a short text. - * @param similarity Similarity metric to use with the IVF index. - * Possible options are: - * - CosmosDBSimilarityType.COS (cosine distance) - * - CosmosDBSimilarityType.L2 (Euclidean distance) - * - CosmosDBSimilarityType.IP (inner product) - * @returns A promise that resolves when the index has been created. - */ - async createIndex( - dimensions: number | undefined = undefined, - indexType: "ivf" | "hnsw" | "diskann" = "ivf", - similarity: AzureCosmosDBMongoDBSimilarityType = AzureCosmosDBMongoDBSimilarityType.COS - ): Promise { - await this.connectPromise; - - let vectorLength = dimensions; - - if (vectorLength === undefined) { - const queryEmbedding = await this.embeddings.embedQuery("test"); - vectorLength = queryEmbedding.length; - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const cosmosSearchOptions: any = { - kind: "", - similarity, - dimensions: vectorLength, - }; - - if (indexType === "hnsw") { - cosmosSearchOptions.kind = "vector-hnsw"; - cosmosSearchOptions.m = this.indexOptions.m ?? 16; - cosmosSearchOptions.efConstruction = - this.indexOptions.efConstruction ?? 200; - } else if (indexType === "diskann") { - cosmosSearchOptions.kind = "vector-diskann"; - cosmosSearchOptions.maxDegree = this.indexOptions.maxDegree ?? 40; - cosmosSearchOptions.lBuild = this.indexOptions.lBuild ?? 50; - cosmosSearchOptions.lSearch = this.indexOptions.lSearch ?? 40; - /** Default to IVF index */ - } else { - cosmosSearchOptions.kind = "vector-ivf"; - cosmosSearchOptions.numLists = this.indexOptions.numLists ?? 100; - } - - const createIndexCommands = { - createIndexes: this.collection.collectionName, - indexes: [ - { - name: this.indexName, - key: { [this.embeddingKey]: "cosmosSearch" }, - cosmosSearchOptions, - }, - ], - }; - - await this.database.command(createIndexCommands); - } - - /** - * Removes specified documents from the AzureCosmosDBMongoDBVectorStore. - * If no IDs or filter are specified, all documents will be removed. - * @param params Parameters for the delete operation. - * @returns A promise that resolves when the documents have been removed. - */ - async delete( - params: AzureCosmosDBMongoDBDeleteParams | string[] = {} - ): Promise { - await this.initialize(); - - let ids: string | string[] | undefined; - let filter: AzureCosmosDBMongoDBDeleteParams["filter"]; - if (Array.isArray(params)) { - ids = params; - } else { - ids = params.ids; - filter = params.filter; - } - const idsArray = Array.isArray(ids) ? ids : [ids]; - const deleteIds = ids && idsArray.length > 0 ? idsArray : undefined; - let deleteFilter = filter ?? {}; - - if (deleteIds) { - const objectIds = deleteIds.map((id) => new ObjectId(id)); - deleteFilter = { _id: { $in: objectIds }, ...deleteFilter }; - } - - await this.collection.deleteMany(deleteFilter); - } - - /** - * Closes any newly instanciated Azure Cosmos DB client. - * If the client was passed in the constructor, it will not be closed. - * @returns A promise that resolves when any newly instanciated Azure - * Cosmos DB client been closed. - */ - async close(): Promise { - if (this.client) { - await this.client.close(); - } - } - - /** - * Method for adding vectors to the AzureCosmosDBMongoDBVectorStore. - * @param vectors Vectors to be added. - * @param documents Corresponding documents to be added. - * @returns A promise that resolves to the added documents IDs. - */ - async addVectors( - vectors: number[][], - documents: DocumentInterface[] - ): Promise { - const docs = vectors.map((embedding, idx) => ({ - [this.textKey]: documents[idx].pageContent, - [this.embeddingKey]: embedding, - ...documents[idx].metadata, - })); - await this.initialize(); - const result = await this.collection.insertMany(docs); - return Object.values(result.insertedIds).map((id) => String(id)); - } - - /** - * Method for adding documents to the AzureCosmosDBMongoDBVectorStore. It first converts - * the documents to texts and then adds them as vectors. - * @param documents The documents to add. - * @returns A promise that resolves to the added documents IDs. - */ - async addDocuments(documents: DocumentInterface[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Method that performs a similarity search on the vectors stored in the - * collection. It returns a list of documents and their corresponding - * similarity scores. - * @param queryVector Query vector for the similarity search. - * @param k=4 Number of nearest neighbors to return. - * @returns Promise that resolves to a list of documents and their corresponding similarity scores. - */ - async similaritySearchVectorWithScore( - queryVector: number[], - k: number, - indexType?: "ivf" | "hnsw" | "diskann" - ): Promise<[Document, number][]> { - await this.initialize(); - - const pipeline = [ - { - $search: { - cosmosSearch: { - vector: queryVector, - path: this.embeddingKey, - k: k ?? 4, - ...(indexType === "diskann" - ? { lSearch: this.indexOptions.lSearch ?? 40 } - : {}), - }, - returnStoredSource: true, - }, - }, - { - $project: { - similarityScore: { $meta: "searchScore" }, - document: "$$ROOT", - }, - }, - ]; - const results = await this.collection - .aggregate(pipeline) - .map<[Document, number]>((result) => { - const { similarityScore: score, document } = result; - const text = document[this.textKey]; - return [new Document({ pageContent: text, metadata: document }), score]; - }); - - return results.toArray(); - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND - * diversity among selected documents. - * @param query Text to look up documents similar to. - * @param options.k Number of documents to return. - * @param options.fetchK=20 Number of documents to fetch before passing to - * the MMR algorithm. - * @param options.lambda=0.5 Number between 0 and 1 that determines the - * degree of diversity among the results, where 0 corresponds to maximum - * diversity and 1 to minimum diversity. - * @returns List of documents selected by maximal marginal relevance. - */ - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions - ): Promise; - - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions, - indexType: "ivf" | "hnsw" | "diskann" - ): Promise; - - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions, - indexType?: "ivf" | "hnsw" | "diskann" - ): Promise { - const { k, fetchK = 20, lambda = 0.5 } = options; - - const queryEmbedding = await this.embeddings.embedQuery(query); - const docs = await this.similaritySearchVectorWithScore( - queryEmbedding, - fetchK, - indexType - ); - const embeddingList = docs.map((doc) => doc[0].metadata[this.embeddingKey]); - - // Re-rank the results using MMR - const mmrIndexes = maximalMarginalRelevance( - queryEmbedding, - embeddingList, - lambda, - k - ); - - const mmrDocs = mmrIndexes.map((index) => docs[index][0]); - return mmrDocs; - } - - /** - * Initializes the AzureCosmosDBMongoDBVectorStore by connecting to the database. - * @param client The MongoClient to use for connecting to the database. - * @param databaseName The name of the database to use. - * @param collectionName The name of the collection to use. - * @returns A promise that resolves when the AzureCosmosDBMongoDBVectorStore has been initialized. - */ - private async init( - client: MongoClient, - databaseName: string, - collectionName: string - ): Promise { - this.connectPromise = (async () => { - await client.connect(); - this.database = client.db(databaseName); - this.collection = this.database.collection(collectionName); - })(); - - // Unless skipCreate is set, create the index - // This operation is no-op if the index already exists - if (!this.indexOptions.skipCreate) { - const indexType = this.indexOptions.indexType || "ivf"; - await this.createIndex( - this.indexOptions.dimensions, - indexType, - this.indexOptions.similarity - ); - } - } - - /** - * Static method to create an instance of AzureCosmosDBMongoDBVectorStore from a - * list of texts. It first converts the texts to vectors and then adds - * them to the collection. - * @param texts List of texts to be converted to vectors. - * @param metadatas Metadata for the texts. - * @param embeddings Embeddings to be used for conversion. - * @param dbConfig Database configuration for Azure Cosmos DB for MongoDB vCore. - * @returns Promise that resolves to a new instance of AzureCosmosDBMongoDBVectorStore. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: EmbeddingsInterface, - dbConfig: AzureCosmosDBMongoDBConfig - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return AzureCosmosDBMongoDBVectorStore.fromDocuments( - docs, - embeddings, - dbConfig - ); - } - - /** - * Static method to create an instance of AzureCosmosDBMongoDBVectorStore from a - * list of documents. It first converts the documents to vectors and then - * adds them to the collection. - * @param docs List of documents to be converted to vectors. - * @param embeddings Embeddings to be used for conversion. - * @param dbConfig Database configuration for Azure Cosmos DB for MongoDB vCore. - * @returns Promise that resolves to a new instance of AzureCosmosDBMongoDBVectorStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: EmbeddingsInterface, - dbConfig: AzureCosmosDBMongoDBConfig - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } -} diff --git a/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_nosql.ts b/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_nosql.ts deleted file mode 100644 index 3e4acb259c77..000000000000 --- a/libs/langchain-azure-cosmosdb/src/azure_cosmosdb_nosql.ts +++ /dev/null @@ -1,539 +0,0 @@ -import type { EmbeddingsInterface } from "@langchain/core/embeddings"; -import { - MaxMarginalRelevanceSearchOptions, - VectorStore, -} from "@langchain/core/vectorstores"; -import { Document, DocumentInterface } from "@langchain/core/documents"; -import { maximalMarginalRelevance } from "@langchain/core/utils/math"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { - Container, - ContainerRequest, - CosmosClient, - CosmosClientOptions, - DatabaseRequest, - IndexingPolicy, - SqlParameter, - SqlQuerySpec, - VectorEmbedding, - VectorEmbeddingPolicy, - VectorIndex, -} from "@azure/cosmos"; -import { DefaultAzureCredential, TokenCredential } from "@azure/identity"; - -/** Azure Cosmos DB for NoSQL query filter. */ -export type AzureCosmosDBNoSQLQueryFilter = string | SqlQuerySpec; - -/** Azure AI Search filter type. */ -export type AzureCosmosDBNoSQLFilterType = { - /** - * SQL filter clause to add to the vector search query. - * @example 'WHERE c.category = "cars" LIMIT 10 OFFSSET 0' - */ - filterClause?: AzureCosmosDBNoSQLQueryFilter; - /** Determines whether or not to include the embeddings in the search results. */ - includeEmbeddings?: boolean; -}; - -/** Azure Cosmos DB for NoSQL Delete Parameters. */ -export type AzureCosmosDBNoSqlDeleteParams = { - /** List of IDs for the documents to be removed. */ - readonly ids?: string | string[]; - /** SQL query to select the documents to be removed. */ - readonly filter?: AzureCosmosDBNoSQLQueryFilter; -}; - -/** Azure Cosmos DB for NoSQL database creation options. */ -export type AzureCosmosDBNoSqlCreateDatabaseOptions = Partial< - Omit ->; -/** Azure Cosmos DB for NoSQL container creation options. */ -export type AzureCosmosDBNoSqlCreateContainerOptions = Partial< - Omit ->; - -/** - * Initialization options for the Azure CosmosDB for NoSQL database and container. - * - * Note that if you provides multiple vector embeddings in the vectorEmbeddingPolicy, - * the first one will be used for creating documents and searching. - */ -export interface AzureCosmosDBNoSQLInitOptions { - readonly vectorEmbeddingPolicy?: VectorEmbeddingPolicy; - readonly indexingPolicy?: IndexingPolicy; - readonly createContainerOptions?: AzureCosmosDBNoSqlCreateContainerOptions; - readonly createDatabaseOptions?: AzureCosmosDBNoSqlCreateDatabaseOptions; -} - -/** - * Configuration options for the `AzureCosmosDBNoSQLVectorStore` constructor. - */ -export interface AzureCosmosDBNoSQLConfig - extends AzureCosmosDBNoSQLInitOptions { - readonly client?: CosmosClient; - readonly connectionString?: string; - readonly endpoint?: string; - readonly credentials?: TokenCredential; - readonly databaseName?: string; - readonly containerName?: string; - readonly textKey?: string; - readonly metadataKey?: string; -} - -const USER_AGENT_SUFFIX = "langchainjs-cdbnosql-vectorstore-javascript"; - -/** - * Azure Cosmos DB for NoSQL vCore vector store. - * To use this, you should have both: - * - the `@azure/cosmos` NPM package installed - * - a connection string associated with a NoSQL instance - * - * You do not need to create a database or container, it will be created - * automatically. - */ -export class AzureCosmosDBNoSQLVectorStore extends VectorStore { - declare FilterType: AzureCosmosDBNoSQLFilterType; - - get lc_secrets(): { [key: string]: string } { - return { - connectionString: "AZURE_COSMOSDB_NOSQL_CONNECTION_STRING", - }; - } - - private initPromise?: Promise; - - private readonly client: CosmosClient; - - private container: Container; - - private readonly textKey: string; - - private readonly metadataKey: string; - - private embeddingKey: string; - - /** - * Initializes the AzureCosmosDBNoSQLVectorStore. - * Connect the client to the database and create the container, creating them if needed. - * @returns A promise that resolves when the AzureCosmosDBNoSQLVectorStore has been initialized. - */ - initialize: () => Promise; - - _vectorstoreType(): string { - return "azure_cosmosdb_nosql"; - } - - constructor( - embeddings: EmbeddingsInterface, - dbConfig: AzureCosmosDBNoSQLConfig - ) { - super(embeddings, dbConfig); - - const connectionString = - dbConfig.connectionString ?? - getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_CONNECTION_STRING"); - - const endpoint = - dbConfig.endpoint ?? - getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_ENDPOINT"); - - if (!dbConfig.client && !connectionString && !endpoint) { - throw new Error( - "AzureCosmosDBNoSQLVectorStore client, connection string or endpoint must be set." - ); - } - - if (!dbConfig.client) { - if (connectionString) { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - let [endpoint, key] = connectionString!.split(";"); - [, endpoint] = endpoint.split("="); - [, key] = key.split("="); - - this.client = new CosmosClient({ - endpoint, - key, - userAgentSuffix: USER_AGENT_SUFFIX, - }); - } else { - // Use managed identity - this.client = new CosmosClient({ - endpoint, - aadCredentials: dbConfig.credentials ?? new DefaultAzureCredential(), - userAgentSuffix: USER_AGENT_SUFFIX, - } as CosmosClientOptions); - } - } - - const client = dbConfig.client || this.client; - const databaseName = dbConfig.databaseName ?? "vectorSearchDB"; - const containerName = dbConfig.containerName ?? "vectorSearchContainer"; - this.textKey = dbConfig.textKey ?? "text"; - this.metadataKey = dbConfig.metadataKey ?? "metadata"; - const vectorEmbeddingPolicy = dbConfig.vectorEmbeddingPolicy ?? { - vectorEmbeddings: [], - }; - const indexingPolicy = dbConfig.indexingPolicy ?? { - indexingMode: "consistent", - automatic: true, - includedPaths: [{ path: "/*" }], - excludedPaths: [{ path: "/_etag/?" }], - }; - - if (vectorEmbeddingPolicy.vectorEmbeddings.length === 0) { - vectorEmbeddingPolicy.vectorEmbeddings = [ - { - path: "/vector", - dataType: "float32", - distanceFunction: "cosine", - // Will be determined automatically during initialization - dimensions: 0, - } as VectorEmbedding, - ]; - } - - if (!indexingPolicy.vectorIndexes?.length) { - indexingPolicy.vectorIndexes = [ - { - path: "/vector", - type: "quantizedFlat", - } as VectorIndex, - ]; - } - - this.embeddingKey = vectorEmbeddingPolicy.vectorEmbeddings[0].path.slice(1); - if (!this.embeddingKey) { - throw new Error( - "AzureCosmosDBNoSQLVectorStore requires a valid vectorEmbeddings path" - ); - } - - // Deferring initialization to the first call to `initialize` - this.initialize = () => { - if (this.initPromise === undefined) { - this.initPromise = this.init(client, databaseName, containerName, { - vectorEmbeddingPolicy, - indexingPolicy, - createContainerOptions: dbConfig.createContainerOptions, - createDatabaseOptions: dbConfig.createDatabaseOptions, - }).catch((error) => { - console.error( - "Error during AzureCosmosDBNoSQLVectorStore initialization:", - error - ); - }); - } - - return this.initPromise; - }; - } - - /** - * Removes specified documents from the AzureCosmosDBNoSQLVectorStore. - * If no IDs or filter are specified, all documents will be removed. - * @param params Parameters for the delete operation. - * @returns A promise that resolves when the documents have been removed. - */ - async delete(params: AzureCosmosDBNoSqlDeleteParams = {}): Promise { - await this.initialize(); - - if (params.ids && params.filter) { - throw new Error( - `AzureCosmosDBNoSQLVectorStore delete requires either "ids" or "filter" to be set in the params object, not both` - ); - } - - let ids: string[]; - let query: AzureCosmosDBNoSQLQueryFilter | undefined = params.filter; - - // Delete all documents - if (!params.ids && !params.filter) { - query = "SELECT c.id FROM c"; - } - - if (query) { - const { resources } = await this.container.items.query(query).fetchAll(); - ids = resources.map((item) => item.id); - } else { - ids = (Array.isArray(params.ids) ? params.ids : [params.ids]) as string[]; - } - - if (ids.length === 0) { - return; - } - - await Promise.all(ids.map((id) => this.container.item(id).delete())); - } - - /** - * Method for adding vectors to the AzureCosmosDBNoSQLVectorStore. - * @param vectors Vectors to be added. - * @param documents Corresponding documents to be added. - * @returns A promise that resolves to the added documents IDs. - */ - async addVectors( - vectors: number[][], - documents: DocumentInterface[] - ): Promise { - await this.initialize(); - const docs = vectors.map((embedding, idx) => ({ - [this.textKey]: documents[idx].pageContent, - [this.embeddingKey]: embedding, - [this.metadataKey]: documents[idx].metadata, - ...(documents[idx].id ? { id: documents[idx].id } : {}), - })); - - const ids: string[] = []; - const results = await Promise.all( - docs.map((doc) => this.container.items.create(doc)) - ); - - for (const result of results) { - ids.push(result.resource?.id ?? "error: could not create item"); - } - - return ids; - } - - /** - * Method for adding documents to the AzureCosmosDBNoSQLVectorStore. It first converts - * the documents to texts and then adds them as vectors. - * @param documents The documents to add. - * @returns A promise that resolves to the added documents IDs. - */ - async addDocuments(documents: DocumentInterface[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Performs a similarity search on the vectors stored in the container. - * @param query Query text for the similarity search. - * @param k=4 Number of nearest neighbors to return. - * @param filter Optional filter options for the documents. - * @returns Promise that resolves to a list of documents. - */ - async similaritySearch( - query: string, - k = 4, - filter: this["FilterType"] | undefined = undefined - ): Promise { - const results = await this.similaritySearchWithScore(query, k, filter); - - return results.map((result) => result[0]); - } - - /** - * Performs a similarity search on the vectors stored in the container. - * @param queryVector Query vector for the similarity search. - * @param k=4 Number of nearest neighbors to return. - * @param filter Optional filter options for the documents. - * @returns Promise that resolves to a list of documents and their corresponding similarity scores. - */ - async similaritySearchVectorWithScore( - queryVector: number[], - k = 4, - filter: this["FilterType"] | undefined = undefined - ): Promise<[Document, number][]> { - await this.initialize(); - - let filterClause = ""; - let filterClauseParams: SqlParameter[] = []; - if (filter?.filterClause) { - if (typeof filter.filterClause === "string") { - filterClause = `${filter.filterClause} `; - } else { - filterClause = `${filter.filterClause.query} `; - filterClauseParams = filter.filterClause.parameters ?? []; - } - } - - const embeddings = filter?.includeEmbeddings - ? `c[@embeddingKey] AS vector, ` - : ""; - const query = `SELECT TOP @k c.id, ${embeddings}c[@textKey] AS text, c[@metadataKey] AS metadata, VectorDistance(c[@embeddingKey], @vector) AS similarityScore FROM c ${filterClause}ORDER BY VectorDistance(c[@embeddingKey], @vector)`; - - const { resources: items } = await this.container.items - .query( - { - query, - parameters: [ - ...filterClauseParams, - { name: "@k", value: k }, - { name: "@textKey", value: this.textKey }, - { name: "@metadataKey", value: this.metadataKey }, - { name: "@embeddingKey", value: this.embeddingKey }, - { name: "@vector", value: queryVector }, - ], - }, - { maxItemCount: k } - ) - .fetchAll(); - - const docsAndScores = items.map( - (item) => - [ - new Document({ - id: item.id, - pageContent: item.text, - metadata: { - ...(item.metadata ?? {}), - ...(filter?.includeEmbeddings - ? { [this.embeddingKey]: item.vector } - : {}), - }, - }), - item.similarityScore, - ] as [Document, number] - ); - - return docsAndScores; - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND - * diversity among selected documents. - * @param query Text to look up documents similar to. - * @param options.k Number of documents to return. - * @param options.fetchK=20 Number of documents to fetch before passing to - * the MMR algorithm. - * @param options.lambda=0.5 Number between 0 and 1 that determines the - * degree of diversity among the results, where 0 corresponds to maximum - * diversity and 1 to minimum diversity. - * @returns List of documents selected by maximal marginal relevance. - */ - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions - ): Promise { - const { k, fetchK = 20, lambda = 0.5 } = options; - const includeEmbeddingsFlag = options.filter?.includeEmbeddings || false; - - const queryEmbedding = await this.embeddings.embedQuery(query); - const docs = await this.similaritySearchVectorWithScore( - queryEmbedding, - fetchK, - { - ...options.filter, - includeEmbeddings: true, - } - ); - const embeddingList = docs.map((doc) => doc[0].metadata[this.embeddingKey]); - - // Re-rank the results using MMR - const mmrIndexes = maximalMarginalRelevance( - queryEmbedding, - embeddingList, - lambda, - k - ); - - return mmrIndexes.map((index) => { - const doc = docs[index][0]; - - // Remove embeddings if they were not requested originally - if (!includeEmbeddingsFlag) { - delete doc.metadata[this.embeddingKey]; - } - return doc; - }); - } - - /** - * Initializes the AzureCosmosDBNoSQLVectorStore by connecting to the database. - * @param client The CosmosClient to use for connecting to the database. - * @param databaseName The name of the database to use. - * @param containerName The name of the collection to use. - * @param initOptions Initialization options for the database and container. - * @returns A promise that resolves when the AzureCosmosDBNoSQLVectorStore has been initialized. - */ - private async init( - client: CosmosClient, - databaseName: string, - containerName: string, - initOptions: AzureCosmosDBNoSQLInitOptions - ): Promise { - // Determine vector dimensions if not provided - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const vectorEmbeddingPolicy = initOptions.vectorEmbeddingPolicy!; - const needDimensions = vectorEmbeddingPolicy.vectorEmbeddings.some( - (v) => !v.dimensions - ); - if (needDimensions) { - const queryEmbedding = await this.embeddings.embedQuery("test"); - for (const v of vectorEmbeddingPolicy.vectorEmbeddings) { - if (!v.dimensions) { - v.dimensions = queryEmbedding.length; - } - } - } - - const { database } = await client.databases.createIfNotExists({ - ...(initOptions?.createDatabaseOptions ?? {}), - id: databaseName, - }); - - const { container } = await database.containers.createIfNotExists({ - ...(initOptions?.createContainerOptions ?? {}), - indexingPolicy: initOptions?.indexingPolicy, - vectorEmbeddingPolicy, - id: containerName, - }); - this.container = container; - } - - /** - * Static method to create an instance of AzureCosmosDBNoSQLVectorStore from a - * list of texts. It first converts the texts to vectors and then adds - * them to the collection. - * @param texts List of texts to be converted to vectors. - * @param metadatas Metadata for the texts. - * @param embeddings Embeddings to be used for conversion. - * @param dbConfig Database configuration for Azure Cosmos DB for NoSQL. - * @returns Promise that resolves to a new instance of AzureCosmosDBNoSQLVectorStore. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: EmbeddingsInterface, - dbConfig: AzureCosmosDBNoSQLConfig - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return AzureCosmosDBNoSQLVectorStore.fromDocuments( - docs, - embeddings, - dbConfig - ); - } - - /** - * Static method to create an instance of AzureCosmosDBNoSQLVectorStore from a - * list of documents. It first converts the documents to vectors and then - * adds them to the collection. - * @param docs List of documents to be converted to vectors. - * @param embeddings Embeddings to be used for conversion. - * @param dbConfig Database configuration for Azure Cosmos DB for NoSQL. - * @returns Promise that resolves to a new instance of AzureCosmosDBNoSQLVectorStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: EmbeddingsInterface, - dbConfig: AzureCosmosDBNoSQLConfig - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } -} diff --git a/libs/langchain-azure-cosmosdb/src/caches/caches_mongodb.ts b/libs/langchain-azure-cosmosdb/src/caches/caches_mongodb.ts deleted file mode 100644 index f2c8e81b3af2..000000000000 --- a/libs/langchain-azure-cosmosdb/src/caches/caches_mongodb.ts +++ /dev/null @@ -1,177 +0,0 @@ -import { - BaseCache, - deserializeStoredGeneration, - serializeGeneration, -} from "@langchain/core/caches"; -import { Generation } from "@langchain/core/outputs"; -import { Document } from "@langchain/core/documents"; -import { EmbeddingsInterface } from "@langchain/core/embeddings"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { MongoClient } from "mongodb"; -import { - AzureCosmosDBMongoDBConfig, - AzureCosmosDBMongoDBVectorStore, - AzureCosmosDBMongoDBSimilarityType, -} from "../azure_cosmosdb_mongodb.js"; - -/** - * Represents a Semantic Cache that uses CosmosDB MongoDB backend as the underlying - * storage system. - * - * @example - * ```typescript - * const embeddings = new OpenAIEmbeddings(); - * const cache = new AzureCosmosDBMongoDBSemanticCache(embeddings, { - * client?: MongoClient - * }); - * const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - * - * // Invoke the model to perform an action - * const response = await model.invoke("Do something random!"); - * console.log(response); - * ``` - */ -export class AzureCosmosDBMongoDBSemanticCache extends BaseCache { - private embeddings: EmbeddingsInterface; - - private config: AzureCosmosDBMongoDBConfig; - - private similarityScoreThreshold: number; - - private cacheDict: { [key: string]: AzureCosmosDBMongoDBVectorStore } = {}; - - private readonly client: MongoClient | undefined; - - private vectorDistanceFunction: string; - - constructor( - embeddings: EmbeddingsInterface, - dbConfig: AzureCosmosDBMongoDBConfig, - similarityScoreThreshold: number = 0.6 - ) { - super(); - - const connectionString = - dbConfig.connectionString ?? - getEnvironmentVariable("AZURE_COSMOSDB_MONGODB_CONNECTION_STRING"); - - if (!dbConfig.client && !connectionString) { - throw new Error( - "AzureCosmosDBMongoDBSemanticCache client or connection string must be set." - ); - } - - if (!dbConfig.client) { - this.client = new MongoClient(connectionString!, { - appName: "langchainjs", - }); - } else { - this.client = dbConfig.client; - } - - this.config = { - ...dbConfig, - client: this.client, - collectionName: dbConfig.collectionName ?? "semanticCacheContainer", - }; - - this.similarityScoreThreshold = similarityScoreThreshold; - this.embeddings = embeddings; - this.vectorDistanceFunction = - dbConfig?.indexOptions?.similarity ?? - AzureCosmosDBMongoDBSimilarityType.COS; - } - - private getLlmCache(llmKey: string) { - const key = this.keyEncoder(llmKey); - if (!this.cacheDict[key]) { - this.cacheDict[key] = new AzureCosmosDBMongoDBVectorStore( - this.embeddings, - this.config - ); - } - return this.cacheDict[key]; - } - - /** - * Retrieves data from the cache. - * - * @param prompt The prompt for lookup. - * @param llmKey The LLM key used to construct the cache key. - * @returns An array of Generations if found, null otherwise. - */ - async lookup(prompt: string, llmKey: string): Promise { - const llmCache = this.getLlmCache(llmKey); - - const queryEmbedding = await this.embeddings.embedQuery(prompt); - const results = await llmCache.similaritySearchVectorWithScore( - queryEmbedding, - 1, - this.config.indexOptions?.indexType - ); - if (!results.length) return null; - - const generations = results - .flatMap(([document, score]) => { - const isSimilar = - (this.vectorDistanceFunction === - AzureCosmosDBMongoDBSimilarityType.L2 && - score <= this.similarityScoreThreshold) || - (this.vectorDistanceFunction !== - AzureCosmosDBMongoDBSimilarityType.L2 && - score >= this.similarityScoreThreshold); - - if (!isSimilar) return undefined; - - return document.metadata.return_value.map((gen: string) => - deserializeStoredGeneration(JSON.parse(gen)) - ); - }) - .filter((gen) => gen !== undefined); - - return generations.length > 0 ? generations : null; - } - - /** - * Updates the cache with new data. - * - * @param prompt The prompt for update. - * @param llmKey The LLM key used to construct the cache key. - * @param value The value to be stored in the cache. - */ - public async update( - prompt: string, - llmKey: string, - returnValue: Generation[] - ): Promise { - const serializedGenerations = returnValue.map((generation) => - JSON.stringify(serializeGeneration(generation)) - ); - - const llmCache = this.getLlmCache(llmKey); - - const metadata = { - llm_string: llmKey, - prompt, - return_value: serializedGenerations, - }; - - const doc = new Document({ - pageContent: prompt, - metadata, - }); - - await llmCache.addDocuments([doc]); - } - - /** - * deletes the semantic cache for a given llmKey - * @param llmKey - */ - public async clear(llmKey: string) { - const key = this.keyEncoder(llmKey); - if (this.cacheDict[key]) { - await this.cacheDict[key].delete(); - } - } -} diff --git a/libs/langchain-azure-cosmosdb/src/caches/caches_nosql.ts b/libs/langchain-azure-cosmosdb/src/caches/caches_nosql.ts deleted file mode 100644 index 2dd321cb37d7..000000000000 --- a/libs/langchain-azure-cosmosdb/src/caches/caches_nosql.ts +++ /dev/null @@ -1,190 +0,0 @@ -import { - BaseCache, - deserializeStoredGeneration, - serializeGeneration, -} from "@langchain/core/caches"; -import { Generation } from "@langchain/core/outputs"; -import { Document } from "@langchain/core/documents"; -import { EmbeddingsInterface } from "@langchain/core/embeddings"; -import { CosmosClient, CosmosClientOptions } from "@azure/cosmos"; -import { DefaultAzureCredential } from "@azure/identity"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { - AzureCosmosDBNoSQLConfig, - AzureCosmosDBNoSQLVectorStore, -} from "../azure_cosmosdb_nosql.js"; - -const USER_AGENT_SUFFIX = "langchainjs-cdbnosql-semanticcache-javascript"; -const DEFAULT_CONTAINER_NAME = "semanticCacheContainer"; - -/** - * Represents a Semantic Cache that uses CosmosDB NoSQL backend as the underlying - * storage system. - * - * @example - * ```typescript - * const embeddings = new OpenAIEmbeddings(); - * const cache = new AzureCosmosDBNoSQLSemanticCache(embeddings, { - * databaseName: DATABASE_NAME, - * containerName: CONTAINER_NAME - * }); - * const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - * - * // Invoke the model to perform an action - * const response = await model.invoke("Do something random!"); - * console.log(response); - * ``` - */ -export class AzureCosmosDBNoSQLSemanticCache extends BaseCache { - private embeddings: EmbeddingsInterface; - - private config: AzureCosmosDBNoSQLConfig; - - private similarityScoreThreshold: number; - - private cacheDict: { [key: string]: AzureCosmosDBNoSQLVectorStore } = {}; - - private vectorDistanceFunction: string; - - constructor( - embeddings: EmbeddingsInterface, - dbConfig: AzureCosmosDBNoSQLConfig, - similarityScoreThreshold: number = 0.6 - ) { - super(); - let client: CosmosClient; - - const connectionString = - dbConfig.connectionString ?? - getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_CONNECTION_STRING"); - - const endpoint = - dbConfig.endpoint ?? - getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_ENDPOINT"); - - if (!dbConfig.client && !connectionString && !endpoint) { - throw new Error( - "AzureCosmosDBNoSQLSemanticCache client, connection string or endpoint must be set." - ); - } - - if (!dbConfig.client) { - if (connectionString) { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - let [endpoint, key] = connectionString!.split(";"); - [, endpoint] = endpoint.split("="); - [, key] = key.split("="); - - client = new CosmosClient({ - endpoint, - key, - userAgentSuffix: USER_AGENT_SUFFIX, - }); - } else { - // Use managed identity - client = new CosmosClient({ - endpoint, - aadCredentials: dbConfig.credentials ?? new DefaultAzureCredential(), - userAgentSuffix: USER_AGENT_SUFFIX, - } as CosmosClientOptions); - } - } else { - client = dbConfig.client; - } - - this.vectorDistanceFunction = - dbConfig.vectorEmbeddingPolicy?.vectorEmbeddings[0].distanceFunction ?? - "cosine"; - - this.config = { - ...dbConfig, - client, - databaseName: dbConfig.databaseName, - containerName: dbConfig.containerName ?? DEFAULT_CONTAINER_NAME, - }; - this.embeddings = embeddings; - this.similarityScoreThreshold = similarityScoreThreshold; - } - - private getLlmCache(llmKey: string) { - const key = this.keyEncoder(llmKey); - if (!this.cacheDict[key]) { - this.cacheDict[key] = new AzureCosmosDBNoSQLVectorStore( - this.embeddings, - this.config - ); - } - return this.cacheDict[key]; - } - - /** - * Retrieves data from the cache. - * - * @param prompt The prompt for lookup. - * @param llmKey The LLM key used to construct the cache key. - * @returns An array of Generations if found, null otherwise. - */ - public async lookup(prompt: string, llmKey: string) { - const llmCache = this.getLlmCache(llmKey); - - const results = await llmCache.similaritySearchWithScore(prompt, 1); - if (!results.length) return null; - - const generations = results - .flatMap(([document, score]) => { - const isSimilar = - (this.vectorDistanceFunction === "euclidean" && - score <= this.similarityScoreThreshold) || - (this.vectorDistanceFunction !== "euclidean" && - score >= this.similarityScoreThreshold); - - if (!isSimilar) return undefined; - - return document.metadata.return_value.map((gen: string) => - deserializeStoredGeneration(JSON.parse(gen)) - ); - }) - .filter((gen) => gen !== undefined); - - return generations.length > 0 ? generations : null; - } - - /** - * Updates the cache with new data. - * - * @param prompt The prompt for update. - * @param llmKey The LLM key used to construct the cache key. - * @param value The value to be stored in the cache. - */ - public async update( - prompt: string, - llmKey: string, - returnValue: Generation[] - ) { - const serializedGenerations = returnValue.map((generation) => - JSON.stringify(serializeGeneration(generation)) - ); - const llmCache = this.getLlmCache(llmKey); - const metadata = { - llm_string: llmKey, - prompt, - return_value: serializedGenerations, - }; - const doc = new Document({ - pageContent: prompt, - metadata, - }); - await llmCache.addDocuments([doc]); - } - - /** - * deletes the semantic cache for a given llmKey - * @param llmKey - */ - public async clear(llmKey: string) { - const key = this.keyEncoder(llmKey); - if (this.cacheDict[key]) { - await this.cacheDict[key].delete(); - } - } -} diff --git a/libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts b/libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts deleted file mode 100644 index 8a0ba0264160..000000000000 --- a/libs/langchain-azure-cosmosdb/src/chat_histories/mongodb.ts +++ /dev/null @@ -1,230 +0,0 @@ -import { - Collection, - Document as AzureCosmosMongoDBDocument, - PushOperator, - Db, - MongoClient, -} from "mongodb"; -import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; -import { - BaseMessage, - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "@langchain/core/messages"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; - -export interface AzureCosmosDBMongoChatHistoryDBConfig { - readonly client?: MongoClient; - readonly connectionString?: string; - readonly databaseName?: string; - readonly collectionName?: string; -} - -export type ChatSessionMongo = { - id: string; - context: Record; -}; - -const ID_KEY = "sessionId"; -const ID_USER = "userId"; - -export class AzureCosmosDBMongoChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "azurecosmosdb"]; - - get lc_secrets(): { [key: string]: string } { - return { - connectionString: "AZURE_COSMOSDB_MONGODB_CONNECTION_STRING", - }; - } - - private initPromise?: Promise; - - private context: Record = {}; - - private readonly client: MongoClient | undefined; - - private database: Db; - - private collection: Collection; - - private sessionId: string; - - private userId: string; - - initialize: () => Promise; - - constructor( - dbConfig: AzureCosmosDBMongoChatHistoryDBConfig, - sessionId: string, - userId: string - ) { - super(); - - const connectionString = - dbConfig.connectionString ?? - getEnvironmentVariable("AZURE_COSMOSDB_MONGODB_CONNECTION_STRING"); - - if (!dbConfig.client && !connectionString) { - throw new Error("Mongo client or connection string must be set."); - } - - if (!dbConfig.client) { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - this.client = new MongoClient(connectionString!, { - appName: "langchainjs", - }); - } - - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const client = dbConfig.client || this.client!; - const databaseName = dbConfig.databaseName ?? "chatHistoryDB"; - const collectionName = dbConfig.collectionName ?? "chatHistory"; - - this.sessionId = sessionId; - this.userId = userId ?? "anonymous"; - - // Deferring initialization to the first call to `initialize` - this.initialize = () => { - if (this.initPromise === undefined) { - this.initPromise = this.init( - client, - databaseName, - collectionName - ).catch((error) => { - console.error( - "Error during AzureCosmosDBMongoChatMessageHistory initialization: ", - error - ); - }); - } - - return this.initPromise; - }; - } - - /** - * Initializes the AzureCosmosDBMongoChatMessageHistory by connecting to the database. - * @param client The MongoClient to use for connecting to the database. - * @param databaseName The name of the database to use. - * @param collectionName The name of the collection to use. - * @returns A promise that resolves when the AzureCosmosDBMongoChatMessageHistory has been initialized. - */ - private async init( - client: MongoClient, - databaseName: string, - collectionName: string - ): Promise { - this.initPromise = (async () => { - await client.connect(); - this.database = client.db(databaseName); - this.collection = this.database.collection(collectionName); - })(); - - return this.initPromise; - } - - /** - * Retrieves the messages stored in the history. - * @returns A promise that resolves with the messages stored in the history. - */ - async getMessages(): Promise { - await this.initialize(); - - const document = await this.collection.findOne({ - [ID_KEY]: this.sessionId, - [ID_USER]: this.userId, - }); - const messages = document?.messages || []; - return mapStoredMessagesToChatMessages(messages); - } - - /** - * Adds a message to the history. - * @param message The message to add to the history. - * @returns A promise that resolves when the message has been added to the history. - */ - async addMessage(message: BaseMessage): Promise { - await this.initialize(); - - const messages = mapChatMessagesToStoredMessages([message]); - const context = await this.getContext(); - await this.collection.updateOne( - { [ID_KEY]: this.sessionId, [ID_USER]: this.userId }, - { - $push: { messages: { $each: messages } } as PushOperator, - $set: { context }, - }, - { upsert: true } - ); - } - - /** - * Clear the history. - * @returns A promise that resolves when the history has been cleared. - */ - async clear(): Promise { - await this.initialize(); - - await this.collection.deleteOne({ - [ID_KEY]: this.sessionId, - [ID_USER]: this.userId, - }); - } - - async getAllSessions(): Promise { - await this.initialize(); - const documents = await this.collection - .find({ - [ID_USER]: this.userId, - }) - .toArray(); - - const chatSessions: ChatSessionMongo[] = documents.map((doc) => ({ - id: doc[ID_KEY], - user_id: doc[ID_USER], - context: doc.context || {}, - })); - - return chatSessions; - } - - async clearAllSessions() { - await this.initialize(); - try { - await this.collection.deleteMany({ - [ID_USER]: this.userId, - }); - } catch (error) { - console.error("Error clearing chat history sessions:", error); - throw error; - } - } - - async getContext(): Promise> { - await this.initialize(); - - const document = await this.collection.findOne({ - [ID_KEY]: this.sessionId, - [ID_USER]: this.userId, - }); - this.context = document?.context || this.context; - return this.context; - } - - async setContext(context: Record): Promise { - await this.initialize(); - - try { - await this.collection.updateOne( - { [ID_KEY]: this.sessionId }, - { - $set: { context }, - }, - { upsert: true } - ); - } catch (error) { - console.error("Error setting chat history context", error); - throw error; - } - } -} diff --git a/libs/langchain-azure-cosmosdb/src/chat_histories/nosql.ts b/libs/langchain-azure-cosmosdb/src/chat_histories/nosql.ts deleted file mode 100644 index c74c46368b7f..000000000000 --- a/libs/langchain-azure-cosmosdb/src/chat_histories/nosql.ts +++ /dev/null @@ -1,257 +0,0 @@ -import { - Container, - CosmosClient, - CosmosClientOptions, - ErrorResponse, -} from "@azure/cosmos"; -import { DefaultAzureCredential, TokenCredential } from "@azure/identity"; -import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; -import { - BaseMessage, - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "@langchain/core/messages"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; - -const USER_AGENT_SUFFIX = "langchainjs-cdbnosql-chathistory-javascript"; -const DEFAULT_DATABASE_NAME = "chatHistoryDB"; -const DEFAULT_CONTAINER_NAME = "chatHistoryContainer"; - -/** - * Lightweight type for listing chat sessions. - */ -export type ChatSession = { - id: string; - context: Record; -}; - -/** - * Type for the input to the `AzureCosmosDBNoSQLChatMessageHistory` constructor. - */ -export interface AzureCosmosDBNoSQLChatMessageHistoryInput { - sessionId: string; - userId?: string; - client?: CosmosClient; - connectionString?: string; - endpoint?: string; - databaseName?: string; - containerName?: string; - credentials?: TokenCredential; - ttl?: number; -} - -/** - * Class for storing chat message history with Cosmos DB NoSQL. It extends the - * BaseListChatMessageHistory class and provides methods to get, add, and - * clear messages. - * - * @example - * ```typescript - * const model = new ChatOpenAI({ - * model: "gpt-3.5-turbo", - * temperature: 0, - * }); - * const prompt = ChatPromptTemplate.fromMessages([ - * [ - * "system", - * "You are a helpful assistant. Answer all questions to the best of your ability.", - * ], - * new MessagesPlaceholder("chat_history"), - * ["human", "{input}"], - * ]); - * - * const chain = prompt.pipe(model).pipe(new StringOutputParser()); - * const chainWithHistory = new RunnableWithMessageHistory({ - * runnable: chain, - * inputMessagesKey: "input", - * historyMessagesKey: "chat_history", - * getMessageHistory: async (sessionId) => { - * const chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory({ - * sessionId: sessionId, - * userId: "user-id", - * databaseName: "DATABASE_NAME", - * containerName: "CONTAINER_NAME", - * }) - * return chatHistory; - * }, - * }); - * await chainWithHistory.invoke( - * { input: "What did I just say my name was?" }, - * { configurable: { sessionId: "session-id" } } - * ); - * ``` - */ -export class AzureCosmsosDBNoSQLChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "azurecosmosdb"]; - - private container: Container; - - private sessionId: string; - - private databaseName: string; - - private containerName: string; - - private client: CosmosClient; - - private userId: string; - - private ttl: number | undefined; - - private messageList: BaseMessage[] = []; - - private initPromise?: Promise; - - private context: Record = {}; - - constructor(chatHistoryInput: AzureCosmosDBNoSQLChatMessageHistoryInput) { - super(); - - this.sessionId = chatHistoryInput.sessionId; - this.databaseName = chatHistoryInput.databaseName ?? DEFAULT_DATABASE_NAME; - this.containerName = - chatHistoryInput.containerName ?? DEFAULT_CONTAINER_NAME; - this.userId = chatHistoryInput.userId ?? "anonymous"; - this.ttl = chatHistoryInput.ttl; - this.client = this.initializeClient(chatHistoryInput); - } - - private initializeClient( - input: AzureCosmosDBNoSQLChatMessageHistoryInput - ): CosmosClient { - const connectionString = - input.connectionString ?? - getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_CONNECTION_STRING"); - const endpoint = - input.endpoint ?? getEnvironmentVariable("AZURE_COSMOSDB_NOSQL_ENDPOINT"); - - if (!input.client && !connectionString && !endpoint) { - throw new Error( - "CosmosClient, connection string, or endpoint must be provided." - ); - } - - if (input.client) { - return input.client; - } - - if (connectionString) { - const [endpointPart, keyPart] = connectionString.split(";"); - const endpoint = endpointPart.split("=")[1]; - const key = keyPart.split("=")[1]; - - return new CosmosClient({ - endpoint, - key, - userAgentSuffix: USER_AGENT_SUFFIX, - }); - } else { - return new CosmosClient({ - endpoint, - aadCredentials: input.credentials ?? new DefaultAzureCredential(), - userAgentSuffix: USER_AGENT_SUFFIX, - } as CosmosClientOptions); - } - } - - private async initializeContainer(): Promise { - if (!this.initPromise) { - this.initPromise = (async () => { - const { database } = await this.client.databases.createIfNotExists({ - id: this.databaseName, - }); - const { container } = await database.containers.createIfNotExists({ - id: this.containerName, - partitionKey: "/userId", - defaultTtl: this.ttl, - }); - this.container = container; - })().catch((error) => { - console.error("Error initializing Cosmos DB container:", error); - throw error; - }); - } - return this.initPromise; - } - - async getMessages(): Promise { - await this.initializeContainer(); - const document = await this.container - .item(this.sessionId, this.userId) - .read(); - const messages = document.resource?.messages || []; - this.messageList = mapStoredMessagesToChatMessages(messages); - return this.messageList; - } - - async addMessage(message: BaseMessage): Promise { - await this.initializeContainer(); - this.messageList = await this.getMessages(); - this.messageList.push(message); - const messages = mapChatMessagesToStoredMessages(this.messageList); - const context = await this.getContext(); - await this.container.items.upsert({ - id: this.sessionId, - userId: this.userId, - context, - messages, - }); - } - - async clear(): Promise { - this.messageList = []; - await this.initializeContainer(); - await this.container.item(this.sessionId, this.userId).delete(); - } - - async clearAllSessions() { - await this.initializeContainer(); - const query = { - query: "SELECT c.id FROM c WHERE c.userId = @userId", - parameters: [{ name: "@userId", value: this.userId }], - }; - const { resources: userSessions } = await this.container.items - .query(query) - .fetchAll(); - for (const userSession of userSessions) { - await this.container.item(userSession.id, this.userId).delete(); - } - } - - async getAllSessions(): Promise { - await this.initializeContainer(); - const query = { - query: "SELECT c.id, c.context FROM c WHERE c.userId = @userId", - parameters: [{ name: "@userId", value: this.userId }], - }; - const { resources: userSessions } = await this.container.items - .query(query) - .fetchAll(); - return userSessions ?? []; - } - - async getContext(): Promise> { - await this.initializeContainer(); - const document = await this.container - .item(this.sessionId, this.userId) - .read(); - this.context = document.resource?.context || this.context; - return this.context; - } - - async setContext(context: Record): Promise { - await this.initializeContainer(); - this.context = context || {}; - try { - await this.container - .item(this.sessionId, this.userId) - .patch([{ op: "replace", path: "/context", value: this.context }]); - } catch (_error: unknown) { - const error = _error as ErrorResponse; - // If document does not exist yet, context will be set when adding the first message - if (error?.code !== 404) { - throw error; - } - } - } -} diff --git a/libs/langchain-azure-cosmosdb/src/index.ts b/libs/langchain-azure-cosmosdb/src/index.ts deleted file mode 100644 index 2778d2c55417..000000000000 --- a/libs/langchain-azure-cosmosdb/src/index.ts +++ /dev/null @@ -1,6 +0,0 @@ -export * from "./azure_cosmosdb_mongodb.js"; -export * from "./azure_cosmosdb_nosql.js"; -export * from "./caches/caches_nosql.js"; -export * from "./caches/caches_mongodb.js"; -export * from "./chat_histories/nosql.js"; -export * from "./chat_histories/mongodb.js"; diff --git a/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_mongodb.int.test.ts b/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_mongodb.int.test.ts deleted file mode 100644 index 13a401e4695c..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_mongodb.int.test.ts +++ /dev/null @@ -1,266 +0,0 @@ -/* eslint-disable no-process-env */ - -import { test, expect } from "@jest/globals"; -import { MongoClient } from "mongodb"; -import { Document } from "@langchain/core/documents"; -import { OpenAIEmbeddings } from "@langchain/openai"; - -import { AzureCosmosDBMongoDBVectorStore } from "../azure_cosmosdb_mongodb.js"; - -const DATABASE_NAME = "langchain"; -const COLLECTION_NAME = "test"; -const INDEX_NAME = "vectorSearchIndex"; - -/* - * To run this test, you need have an Azure Cosmos DB for vCore instance - * running. You can deploy a free version on Azure Portal without any cost, - * following this guide: - * https://learn.microsoft.com/azure/cosmos-db/mongodb/vcore/quickstart-portal - * - * You do not need to create a database or collection, it will be created - * automatically by the test. - * - * Once you have the instance running, you need to set the following environment - * variables before running the test: - * - AZURE_COSMOSDB_MONGODB_CONNECTION_STRING - * - AZURE_OPENAI_API_KEY - * - AZURE_OPENAI_API_INSTANCE_NAME - * - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME - * - AZURE_OPENAI_API_VERSION - * - * A regular OpenAI key can also be used instead of Azure OpenAI. - */ -describe("AzureCosmosDBMongoDBVectorStore", () => { - beforeEach(async () => { - expect(process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING).toBeDefined(); - - // Note: when using Azure OpenAI, you have to also set these variables - // in addition to the API key: - // - AZURE_OPENAI_API_INSTANCE_NAME - // - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME - // - AZURE_OPENAI_API_VERSION - expect( - process.env.OPENAI_API_KEY || process.env.AZURE_OPENAI_API_KEY - ).toBeDefined(); - - const client = new MongoClient( - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING! - ); - await client.connect(); - const db = client.db(DATABASE_NAME); - const collection = await db.createCollection(COLLECTION_NAME); - - // Make sure the database is empty - await collection.deleteMany({}); - - // Delete any existing index - try { - await collection.dropIndex(INDEX_NAME); - } catch { - // Ignore error if the index does not exist - } - - await client.close(); - }); - - test("performs similarity search", async () => { - const vectorStore = new AzureCosmosDBMongoDBVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - collectionName: COLLECTION_NAME, - indexName: INDEX_NAME, - indexOptions: { - numLists: 1, - }, - } - ); - - expect(vectorStore).toBeDefined(); - - await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { pageContent: "Cats sleeps a lot.", metadata: { b: 1 } }, - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - { pageContent: "The house is open", metadata: { d: 1, e: 2 } }, - ]); - - const results: Document[] = await vectorStore.similaritySearch( - "sandwich", - 1 - ); - - expect(results.length).toEqual(1); - expect(results).toMatchObject([ - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - ]); - - const retriever = vectorStore.asRetriever({}); - - const docs = await retriever.getRelevantDocuments("house"); - expect(docs).toBeDefined(); - expect(docs[0]).toMatchObject({ - pageContent: "The house is open", - metadata: { d: 1, e: 2 }, - }); - - await vectorStore.close(); - }); - - test("performs max marginal relevance search", async () => { - const texts = ["foo", "foo", "fox"]; - const vectorStore = await AzureCosmosDBMongoDBVectorStore.fromTexts( - texts, - {}, - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - collectionName: COLLECTION_NAME, - indexName: INDEX_NAME, - indexOptions: { - numLists: 1, - }, - } - ); - - const output = await vectorStore.maxMarginalRelevanceSearch("foo", { - k: 10, - fetchK: 20, - lambda: 0.1, - }); - - expect(output).toHaveLength(texts.length); - - const actual = output.map((doc) => doc.pageContent); - const expected = ["foo", "fox", "foo"]; - expect(actual).toEqual(expected); - - const standardRetriever = await vectorStore.asRetriever(); - - const standardRetrieverOutput = - await standardRetriever.getRelevantDocuments("foo"); - expect(output).toHaveLength(texts.length); - - const standardRetrieverActual = standardRetrieverOutput.map( - (doc) => doc.pageContent - ); - const standardRetrieverExpected = ["foo", "foo", "fox"]; - expect(standardRetrieverActual).toEqual(standardRetrieverExpected); - - const retriever = await vectorStore.asRetriever({ - searchType: "mmr", - searchKwargs: { - fetchK: 20, - lambda: 0.1, - }, - }); - - const retrieverOutput = await retriever.getRelevantDocuments("foo"); - expect(output).toHaveLength(texts.length); - - const retrieverActual = retrieverOutput.map((doc) => doc.pageContent); - const retrieverExpected = ["foo", "fox", "foo"]; - expect(retrieverActual).toEqual(retrieverExpected); - - const similarity = await vectorStore.similaritySearchWithScore("foo", 1); - expect(similarity.length).toBe(1); - - await vectorStore.close(); - }); - - test("deletes documents by id", async () => { - const vectorStore = new AzureCosmosDBMongoDBVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - collectionName: COLLECTION_NAME, - indexName: INDEX_NAME, - indexOptions: { - numLists: 1, - }, - } - ); - - const ids = await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { - pageContent: "The is the house of parliament", - metadata: { d: 1, e: 2 }, - }, - ]); - - // Delete document matching specified ids - await vectorStore.delete({ ids: ids.slice(0, 1) }); - - const results = await vectorStore.similaritySearch("politics", 10); - - expect(results.length).toEqual(1); - expect(results[0].pageContent).toEqual("The is the house of parliament"); - - await vectorStore.close(); - }); - - test("deletes documents by filter", async () => { - const vectorStore = new AzureCosmosDBMongoDBVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - collectionName: COLLECTION_NAME, - indexName: INDEX_NAME, - indexOptions: { - numLists: 1, - }, - } - ); - - await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { - pageContent: "The is the house of parliament", - metadata: { d: 1, e: 2 }, - }, - ]); - - // Delete document matching the filter - await vectorStore.delete({ filter: { a: 1 } }); - - const results = await vectorStore.similaritySearch("politics", 10); - - expect(results.length).toEqual(1); - expect(results[0].pageContent).toEqual("The is the house of parliament"); - - await vectorStore.close(); - }); - - test("deletes all documents", async () => { - const vectorStore = new AzureCosmosDBMongoDBVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - collectionName: COLLECTION_NAME, - indexName: INDEX_NAME, - indexOptions: { - numLists: 1, - }, - } - ); - - await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { - pageContent: "The is the house of parliament", - metadata: { d: 1, e: 2 }, - }, - ]); - - // Delete all documents - await vectorStore.delete(); - - const results = await vectorStore.similaritySearch("politics", 10); - - expect(results.length).toEqual(0); - - await vectorStore.close(); - }); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_mongodb.test.ts b/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_mongodb.test.ts deleted file mode 100644 index cc78e3423555..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_mongodb.test.ts +++ /dev/null @@ -1,200 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { jest, test, expect } from "@jest/globals"; -import { Document } from "@langchain/core/documents"; -import { FakeEmbeddings } from "@langchain/core/utils/testing"; -import { AzureCosmosDBMongoDBVectorStore } from "../azure_cosmosdb_mongodb.js"; - -// Mock mongodb client -const createMockClient = () => ({ - db: jest.fn().mockReturnValue({ - collectionName: "documents", - collection: jest.fn().mockReturnValue({ - listIndexes: jest.fn().mockReturnValue({ - toArray: jest.fn().mockReturnValue([ - { - name: "vectorSearchIndex", - }, - ]), - }), - dropIndex: jest.fn(), - deleteMany: jest.fn(), - insertMany: jest.fn().mockImplementation((docs: any) => ({ - insertedIds: docs.map((_: any, i: any) => `id${i}`), - })), - aggregate: jest.fn().mockReturnValue({ - map: jest.fn().mockReturnValue({ - toArray: jest - .fn() - .mockReturnValue([ - [new Document({ pageContent: "test", metadata: { a: 1 } }), 0.5], - ]), - }), - }), - }), - command: jest.fn(), - }), - connect: jest.fn(), - close: jest.fn(), -}); - -const embedMock = jest.spyOn(FakeEmbeddings.prototype, "embedDocuments"); - -beforeEach(() => { - embedMock.mockClear(); -}); - -test("AzureCosmosDBMongoDBVectorStore works", async () => { - const client = createMockClient(); - const embeddings = new FakeEmbeddings(); - const store = new AzureCosmosDBMongoDBVectorStore(embeddings, { - client: client as any, - }); - - expect(store).toBeDefined(); - - await store.addDocuments([ - { - pageContent: "test", - metadata: { a: 1 }, - }, - ]); - - const mockCollection = client.db().collection(); - - expect(mockCollection.insertMany).toHaveBeenCalledTimes(1); - expect(embedMock).toHaveBeenCalledTimes(1); - - const results = await store.similaritySearch("test", 1); - - expect(mockCollection.aggregate).toHaveBeenCalledTimes(1); - expect(results).toHaveLength(1); -}); - -test("AzureCosmosDBMongoDBVectorStore manages its index", async () => { - const client = createMockClient(); - const embeddings = new FakeEmbeddings(); - const store = new AzureCosmosDBMongoDBVectorStore(embeddings, { - client: client as any, - }); - - const indexExists = await store.checkIndexExists(); - - const mockDb = client.db(); - const mockCollection = mockDb.collection(); - - expect(mockDb.command).toHaveBeenCalledTimes(1); - expect(mockCollection.listIndexes).toHaveBeenCalledTimes(1); - expect(indexExists).toBe(true); - - await store.deleteIndex(); - - expect(mockCollection.dropIndex).toHaveBeenCalledTimes(1); -}); - -test("AzureCosmosDBMongoDBVectorStore deletes documents", async () => { - const client = createMockClient(); - const embeddings = new FakeEmbeddings(); - const store = new AzureCosmosDBMongoDBVectorStore(embeddings, { - client: client as any, - }); - - await store.delete(); - - const mockCollection = client.db().collection(); - expect(mockCollection.deleteMany).toHaveBeenCalledTimes(1); - expect(mockCollection.deleteMany).toHaveBeenCalledWith({}); - - await store.delete({ - ids: ["123456789012345678901234", "123456789012345678901235"], - }); - - expect(mockCollection.deleteMany).toHaveBeenCalledTimes(2); - expect(mockCollection.deleteMany.mock.calls[1][0]).toMatchObject({ _id: {} }); - - await store.delete({ filter: { a: 1 } }); - - expect(mockCollection.deleteMany).toHaveBeenCalledTimes(3); - expect(mockCollection.deleteMany.mock.calls[2][0]).toMatchObject({ a: 1 }); -}); - -test("AzureCosmosDBMongoDBVectorStore adds vectors", async () => { - const client = createMockClient(); - const embeddings = new FakeEmbeddings(); - const store = new AzureCosmosDBMongoDBVectorStore(embeddings, { - client: client as any, - }); - - await store.addVectors( - [[1, 2, 5]], - [ - { - pageContent: "test", - metadata: { a: 1 }, - }, - ] - ); - - const mockCollection = client.db().collection(); - expect(embedMock).toHaveBeenCalledTimes(0); - expect(mockCollection.insertMany).toHaveBeenCalledTimes(1); -}); - -test("AzureCosmosDBMongoDBVectorStore initializes from texts", async () => { - const client = createMockClient(); - const embeddings = new FakeEmbeddings(); - const store = await AzureCosmosDBMongoDBVectorStore.fromTexts( - ["test", "hello", "world"], - {}, - embeddings, - { client: client as any } - ); - - expect(store).toBeDefined(); - - const mockCollection = client.db().collection(); - expect(mockCollection.insertMany).toHaveBeenCalledTimes(1); - expect(mockCollection.insertMany).toHaveBeenCalledWith([ - { - textContent: "test", - vectorContent: [0.1, 0.2, 0.3, 0.4], - }, - { - textContent: "hello", - vectorContent: [0.1, 0.2, 0.3, 0.4], - }, - { - textContent: "world", - vectorContent: [0.1, 0.2, 0.3, 0.4], - }, - ]); - expect(embedMock).toHaveBeenCalledTimes(1); -}); - -test("AzureCosmosDBMongoDBVectorStore initializes from documents", async () => { - const client = createMockClient(); - const embeddings = new FakeEmbeddings(); - const store = await AzureCosmosDBMongoDBVectorStore.fromDocuments( - [ - new Document({ pageContent: "house" }), - new Document({ pageContent: "pool" }), - ], - embeddings, - { client: client as any } - ); - - expect(store).toBeDefined(); - - const mockCollection = client.db().collection(); - expect(mockCollection.insertMany).toHaveBeenCalledTimes(1); - expect(mockCollection.insertMany).toHaveBeenCalledWith([ - { - textContent: "house", - vectorContent: [0.1, 0.2, 0.3, 0.4], - }, - { - textContent: "pool", - vectorContent: [0.1, 0.2, 0.3, 0.4], - }, - ]); - expect(embedMock).toHaveBeenCalledTimes(1); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_nosql.int.test.ts b/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_nosql.int.test.ts deleted file mode 100644 index efd284ea1c38..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_nosql.int.test.ts +++ /dev/null @@ -1,349 +0,0 @@ -/* eslint-disable no-process-env */ - -import { test, expect } from "@jest/globals"; -import { Document } from "@langchain/core/documents"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { CosmosClient } from "@azure/cosmos"; - -import { DefaultAzureCredential } from "@azure/identity"; -import { AzureCosmosDBNoSQLVectorStore } from "../azure_cosmosdb_nosql.js"; - -const DATABASE_NAME = "langchainTestDB"; -const CONTAINER_NAME = "testContainer"; - -/* - * To run this test, you need have an Azure Cosmos DB for NoSQL instance - * running. You can deploy a free version on Azure Portal without any cost, - * following this guide: - * https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search - * - * You do not need to create a database or collection, it will be created - * automatically by the test. - * - * Once you have the instance running, you need to set the following environment - * variables before running the test: - * - AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT - * - AZURE_OPENAI_API_KEY - * - AZURE_OPENAI_API_INSTANCE_NAME - * - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME - * - AZURE_OPENAI_API_VERSION - * - * A regular OpenAI key can also be used instead of Azure OpenAI. - */ -describe("AzureCosmosDBNoSQLVectorStore", () => { - beforeEach(async () => { - // Note: when using Azure OpenAI, you have to also set these variables - // in addition to the API key: - // - AZURE_OPENAI_API_INSTANCE_NAME - // - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME - // - AZURE_OPENAI_API_VERSION - expect( - process.env.OPENAI_API_KEY || process.env.AZURE_OPENAI_API_KEY - ).toBeDefined(); - - let client: CosmosClient; - - if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) { - client = new CosmosClient( - process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING - ); - } else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) { - client = new CosmosClient({ - endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT, - aadCredentials: new DefaultAzureCredential(), - }); - } else { - throw new Error( - "Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT" - ); - } - - // Make sure the database does not exists - try { - await client.database(DATABASE_NAME).delete(); - } catch { - // Ignore error if the database does not exist - } - }); - - test("performs similarity search", async () => { - const vectorStore = new AzureCosmosDBNoSQLVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - - expect(vectorStore).toBeDefined(); - - await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { pageContent: "Cats sleeps a lot.", metadata: { b: 1 } }, - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - { pageContent: "The house is open", metadata: { d: 1, e: 2 } }, - ]); - - const results = await vectorStore.similaritySearch("sandwich", 1); - - expect(results.length).toEqual(1); - expect(results).toMatchObject([ - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - ]); - - const retriever = vectorStore.asRetriever({}); - - const docs = await retriever.invoke("house"); - expect(docs).toBeDefined(); - expect(docs[0]).toMatchObject({ - pageContent: "The house is open", - metadata: { d: 1, e: 2 }, - }); - }); - - test("performs max marginal relevance search", async () => { - const texts = ["foo", "foo", "fox"]; - const vectorStore = await AzureCosmosDBNoSQLVectorStore.fromTexts( - texts, - {}, - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - - const output = await vectorStore.maxMarginalRelevanceSearch("foo", { - k: 10, - fetchK: 20, - lambda: 0.1, - }); - - expect(output).toHaveLength(texts.length); - - const actual = output.map((doc) => doc.pageContent); - const expected = ["foo", "fox", "foo"]; - expect(actual).toEqual(expected); - - const standardRetriever = await vectorStore.asRetriever(); - - const standardRetrieverOutput = await standardRetriever.invoke("foo"); - expect(output).toHaveLength(texts.length); - - const standardRetrieverActual = standardRetrieverOutput.map( - (doc) => doc.pageContent - ); - const standardRetrieverExpected = ["foo", "foo", "fox"]; - expect(standardRetrieverActual).toEqual(standardRetrieverExpected); - - const retriever = await vectorStore.asRetriever({ - searchType: "mmr", - searchKwargs: { - fetchK: 20, - lambda: 0.1, - }, - }); - - const retrieverOutput = await retriever.invoke("foo"); - expect(output).toHaveLength(texts.length); - - const retrieverActual = retrieverOutput.map((doc) => doc.pageContent); - const retrieverExpected = ["foo", "fox", "foo"]; - expect(retrieverActual).toEqual(retrieverExpected); - - const similarity = await vectorStore.similaritySearchWithScore("foo", 1); - expect(similarity.length).toBe(1); - }); - - test("performs similarity search with filter", async () => { - const vectorStore = new AzureCosmosDBNoSQLVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - - expect(vectorStore).toBeDefined(); - - await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { pageContent: "Cats sleeps a lot.", metadata: { b: 1 } }, - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - { pageContent: "The house is open", metadata: { d: 1, e: 2 } }, - ]); - - const results = await vectorStore.similaritySearch("sandwich", 1, { - filterClause: "WHERE c.metadata.d = 1", - }); - - expect(results.length).toEqual(1); - expect(results).toMatchObject([ - { pageContent: "The house is open", metadata: { d: 1, e: 2 } }, - ]); - }); - - test("performs similarity search including vectors in the results", async () => { - const vectorStore = new AzureCosmosDBNoSQLVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - - expect(vectorStore).toBeDefined(); - - await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { pageContent: "Cats sleeps a lot.", metadata: { b: 1 } }, - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - { pageContent: "The house is open", metadata: { d: 1, e: 2 } }, - ]); - - const results: Document[] = await vectorStore.similaritySearch( - "sandwich", - 1, - { includeEmbeddings: true } - ); - - expect(results.length).toEqual(1); - expect(results).toMatchObject([ - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - ]); - expect(results[0].metadata.vector).toBeDefined(); - }); - - test("deletes documents by id", async () => { - const vectorStore = new AzureCosmosDBNoSQLVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - - const ids = await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { - pageContent: "The is the house of parliament", - metadata: { d: 1, e: 2 }, - }, - ]); - - // Delete document matching specified ids - await vectorStore.delete({ ids: ids.slice(0, 1) }); - - const results = await vectorStore.similaritySearch("politics", 10); - - expect(results.length).toEqual(1); - expect(results[0].pageContent).toEqual("The is the house of parliament"); - }); - - test("deletes documents by filter", async () => { - const vectorStore = new AzureCosmosDBNoSQLVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - - await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { - pageContent: "The is the house of parliament", - metadata: { d: 1, e: 2 }, - }, - ]); - - // Delete document matching the filter - await vectorStore.delete({ - filter: { - query: "SELECT * FROM c WHERE c.metadata.a = @value", - parameters: [{ name: "@value", value: 1 }], - }, - }); - - const results = await vectorStore.similaritySearch("politics", 10); - - expect(results.length).toEqual(1); - expect(results[0].pageContent).toEqual("The is the house of parliament"); - }); - - test("deletes all documents", async () => { - const vectorStore = new AzureCosmosDBNoSQLVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - - const documents = Array.from({ length: 101 }, (_, i) => ({ - pageContent: `Document ${i}`, - metadata: { a: i }, - })); - - await vectorStore.addDocuments(documents); - - // Delete all documents - await vectorStore.delete(); - - const results = await vectorStore.similaritySearch("document", 10); - - expect(results.length).toEqual(0); - }); - - test("connect using managed identity", async () => { - // First initialize using a regular connection string - // to create the database and container, as managed identity - // with RBAC does not have permission to create them. - const vectorStoreCS = new AzureCosmosDBNoSQLVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - await vectorStoreCS.addDocuments([{ pageContent: "init", metadata: {} }]); - - const connectionString = process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING; - if (connectionString) { - // Remove the connection string to test managed identity - process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING = ""; - } - - expect(process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING).toBeFalsy(); - expect(process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT).toBeDefined(); - - const vectorStore = new AzureCosmosDBNoSQLVectorStore( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - } - ); - - expect(vectorStore).toBeDefined(); - - await vectorStore.addDocuments([ - { pageContent: "This book is about politics", metadata: { a: 1 } }, - { pageContent: "Cats sleeps a lot.", metadata: { b: 1 } }, - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - { pageContent: "The house is open", metadata: { d: 1, e: 2 } }, - ]); - - const results = await vectorStore.similaritySearch("sandwich", 1); - - expect(results.length).toEqual(1); - expect(results).toMatchObject([ - { pageContent: "Sandwiches taste good.", metadata: { c: 1 } }, - ]); - - if (connectionString) { - // Restore the connection string - process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING = connectionString; - } - }); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_nosql.test.ts b/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_nosql.test.ts deleted file mode 100644 index b761e9f470fd..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/azure_cosmosdb_nosql.test.ts +++ /dev/null @@ -1,224 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { jest, test, expect } from "@jest/globals"; -import { Document } from "@langchain/core/documents"; -import { FakeEmbeddings } from "@langchain/core/utils/testing"; - -import { AzureCosmosDBNoSQLVectorStore } from "../azure_cosmosdb_nosql.js"; - -const embedMock = jest.spyOn(FakeEmbeddings.prototype, "embedDocuments"); - -const createMockClient = () => { - let id = 0; - const client = { - databases: { - createIfNotExists: jest.fn().mockReturnThis(), - get database() { - return this; - }, - containers: { - createIfNotExists: jest.fn().mockReturnThis(), - get container() { - return this; - }, - items: { - create: jest.fn().mockImplementation((doc: any) => ({ - // eslint-disable-next-line no-plusplus - resource: { id: doc.id ?? `${id++}` }, - })), - query: jest.fn().mockReturnThis(), - fetchAll: jest.fn().mockImplementation(() => ({ - resources: Array(id) - .fill(0) - .map((_, i) => ({ id: i })), - })), - }, - item: jest.fn().mockReturnThis(), - delete: jest.fn(), - }, - }, - }; - return client; -}; - -const createDocuments = (n: number) => { - const documents = []; - for (let i = 0; i < n; i += 1) { - documents.push({ - pageContent: `hello ${i}`, - metadata: { - source: `doc-${i}`, - attributes: [], - }, - }); - } - return documents; -}; - -beforeEach(() => { - embedMock.mockClear(); -}); - -test("AzureCosmosDBNoSQLVectorStore addVectors should store documents", async () => { - const embeddings = new FakeEmbeddings(); - const client = createMockClient(); - const store = new AzureCosmosDBNoSQLVectorStore(embeddings, { - client: client as any, - }); - - expect(store).toBeDefined(); - - const documents = createDocuments(1500); - const vectors: number[][] = []; - - for (const doc of documents) { - vectors.push(await embeddings.embedQuery(doc.pageContent)); - } - - await store.addVectors(vectors, documents); - - expect(client.databases.containers.items.create).toHaveBeenCalledTimes(1500); -}); - -test("AzureCosmosDBNoSQLVectorStore addDocuments should embed and store documents", async () => { - const embeddings = new FakeEmbeddings(); - const client = createMockClient(); - const store = new AzureCosmosDBNoSQLVectorStore(embeddings, { - client: client as any, - }); - - expect(store).toBeDefined(); - - const documents = createDocuments(1500); - await store.addDocuments(documents); - - expect(embedMock).toHaveBeenCalledTimes(1); - expect(client.databases.containers.items.create).toHaveBeenCalledTimes(1500); -}); - -test("AzureCosmosDBNoSQLVectorStore addDocuments should use specified IDs", async () => { - const embeddings = new FakeEmbeddings(); - const client = createMockClient(); - const store = new AzureCosmosDBNoSQLVectorStore(embeddings, { - client: client as any, - }); - - expect(store).toBeDefined(); - - const result = await store.addDocuments([ - { - pageContent: "hello", - metadata: { - source: "test", - attributes: [], - }, - id: "id1", - }, - { - pageContent: "hello2", - metadata: { - source: "test", - attributes: [], - }, - id: "id2", - }, - ]); - - expect(client.databases.containers.items.create).toHaveBeenCalledTimes(2); - expect(result).toEqual(["id1", "id2"]); -}); - -test("AzureCosmosDBNoSQLVectorStore deletes documents", async () => { - const embeddings = new FakeEmbeddings(); - const client = createMockClient(); - const store = new AzureCosmosDBNoSQLVectorStore(embeddings, { - client: client as any, - }); - - const documents = createDocuments(10); - await store.addDocuments(documents); - - await store.delete(); - - expect(client.databases.containers.delete).toHaveBeenCalledTimes(10); - - await store.delete({ ids: ["0", "1"] }); - - expect(client.databases.containers.delete).toHaveBeenCalledTimes(12); - - await store.delete({ filter: "SELECT * FROM c" }); - - expect(client.databases.containers.delete).toHaveBeenCalledTimes(22); -}); - -test("AzureCosmosDBNoSQLVectorStore initializes from texts", async () => { - const embeddings = new FakeEmbeddings(); - const client = createMockClient(); - const store = await AzureCosmosDBNoSQLVectorStore.fromTexts( - ["test", "hello", "world"], - {}, - embeddings, - { client: client as any } - ); - - expect(store).toBeDefined(); - - expect(client.databases.containers.items.create).toHaveBeenCalledTimes(3); - expect(client.databases.containers.items.create.mock.calls).toEqual([ - [ - { - text: "test", - vector: [0.1, 0.2, 0.3, 0.4], - metadata: {}, - }, - ], - [ - { - text: "hello", - vector: [0.1, 0.2, 0.3, 0.4], - metadata: {}, - }, - ], - [ - { - text: "world", - vector: [0.1, 0.2, 0.3, 0.4], - metadata: {}, - }, - ], - ]); - expect(embedMock).toHaveBeenCalledTimes(1); -}); - -test("AzureCosmosDBNoSQLVectorStore initializes from documents", async () => { - const embeddings = new FakeEmbeddings(); - const client = createMockClient(); - const store = await AzureCosmosDBNoSQLVectorStore.fromDocuments( - [ - new Document({ pageContent: "house" }), - new Document({ pageContent: "pool" }), - ], - embeddings, - { client: client as any } - ); - - expect(store).toBeDefined(); - - expect(client.databases.containers.items.create).toHaveBeenCalledTimes(2); - expect(client.databases.containers.items.create.mock.calls).toEqual([ - [ - { - text: "house", - vector: [0.1, 0.2, 0.3, 0.4], - metadata: {}, - }, - ], - [ - { - text: "pool", - vector: [0.1, 0.2, 0.3, 0.4], - metadata: {}, - }, - ], - ]); - expect(embedMock).toHaveBeenCalledTimes(1); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/caches/caches_mongodb.int.test.ts b/libs/langchain-azure-cosmosdb/src/tests/caches/caches_mongodb.int.test.ts deleted file mode 100644 index 03fb49e59de5..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/caches/caches_mongodb.int.test.ts +++ /dev/null @@ -1,136 +0,0 @@ -/* eslint-disable no-nested-ternary */ -/* eslint-disable @typescript-eslint/no-explicit-any */ -/* eslint-disable no-process-env */ -import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai"; -import { MongoClient } from "mongodb"; -import { AzureCosmosDBMongoDBSemanticCache } from "../../caches/caches_mongodb.js"; -import { - AzureCosmosDBMongoDBIndexOptions, - AzureCosmosDBMongoDBSimilarityType, -} from "../../azure_cosmosdb_mongodb.js"; - -const DATABASE_NAME = "langchain"; -const COLLECTION_NAME = "test"; - -async function initializeCache( - indexType: any, - distanceFunction: any, - similarityThreshold: number = 0.6 -): Promise { - const embeddingModel = new OpenAIEmbeddings(); - const testEmbedding = await embeddingModel.embedDocuments(["sample text"]); - const dimension = testEmbedding[0].length; - - const indexOptions: AzureCosmosDBMongoDBIndexOptions = { - indexType, - // eslint-disable-next-line no-nested-ternary - similarity: - distanceFunction === "cosine" - ? AzureCosmosDBMongoDBSimilarityType.COS - : distanceFunction === "euclidean" - ? AzureCosmosDBMongoDBSimilarityType.L2 - : AzureCosmosDBMongoDBSimilarityType.IP, - dimensions: dimension, - }; - - let cache: AzureCosmosDBMongoDBSemanticCache; - - const connectionString = process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING; - if (connectionString) { - cache = new AzureCosmosDBMongoDBSemanticCache( - embeddingModel, - { - databaseName: DATABASE_NAME, - collectionName: COLLECTION_NAME, - connectionString, - indexOptions, - }, - similarityThreshold - ); - } else { - throw new Error( - "Please set the environment variable AZURE_COSMOSDB_MONGODB_CONNECTION_STRING" - ); - } - - return cache; -} - -describe("AzureCosmosDBMongoDBSemanticCache", () => { - beforeEach(async () => { - const connectionString = - process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING; - const client = new MongoClient(connectionString!); - - try { - await client.db(DATABASE_NAME).collection(COLLECTION_NAME).drop(); - } catch (error) { - throw new Error("Please set collection name here"); - } - }); - - it("should store and retrieve cache using cosine similarity with ivf index", async () => { - const cache = await initializeCache("ivf", "cosine"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - let cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - cacheOutput = await cache.lookup("bar", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); - - it("should store and retrieve cache using euclidean similarity with hnsw index", async () => { - const cache = await initializeCache("hnsw", "euclidean"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - let cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - cacheOutput = await cache.lookup("bar", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); - - it("should return null if similarity score is below threshold (cosine similarity with ivf index)", async () => { - const cache = await initializeCache("ivf", "cosine", 0.8); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - const cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - const resultBelowThreshold = await cache.lookup("bar", llmString); - expect(resultBelowThreshold).toEqual(null); - - await cache.clear(llmString); - }); - - it("should handle a variety of cache updates and lookups", async () => { - const cache = await initializeCache("ivf", "cosine", 0.7); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - - await cache.update("test1", llmString, [{ text: "response 1" }]); - await cache.update("test2", llmString, [{ text: "response 2" }]); - - let cacheOutput = await cache.lookup("test1", llmString); - expect(cacheOutput).toEqual([{ text: "response 1" }]); - - cacheOutput = await cache.lookup("test2", llmString); - expect(cacheOutput).toEqual([{ text: "response 2" }]); - - cacheOutput = await cache.lookup("test3", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/caches/caches_mongodb.test.ts b/libs/langchain-azure-cosmosdb/src/tests/caches/caches_mongodb.test.ts deleted file mode 100644 index 4bd9cf0ce996..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/caches/caches_mongodb.test.ts +++ /dev/null @@ -1,72 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -// eslint-disable-next-line import/no-extraneous-dependencies -import { jest } from "@jest/globals"; -import { FakeEmbeddings, FakeLLM } from "@langchain/core/utils/testing"; -import { Document } from "@langchain/core/documents"; -import { MongoClient } from "mongodb"; -import { AzureCosmosDBMongoDBSemanticCache } from "../../index.js"; - -const createMockClient = () => ({ - db: jest.fn().mockReturnValue({ - collectionName: "documents", - collection: jest.fn().mockReturnValue({ - listIndexes: jest.fn().mockReturnValue({ - toArray: jest.fn().mockReturnValue([ - { - name: "vectorSearchIndex", - }, - ]), - }), - findOne: jest.fn().mockReturnValue({ - metadata: { - return_value: ['{"text": "fizz"}'], - }, - similarityScore: 0.8, - }), - insertMany: jest.fn().mockImplementation((docs: any) => ({ - insertedIds: docs.map((_: any, i: any) => `id${i}`), - })), - aggregate: jest.fn().mockReturnValue({ - map: jest.fn().mockReturnValue({ - toArray: jest.fn().mockReturnValue([ - [ - new Document({ - pageContent: "test", - metadata: { return_value: ['{"text": "fizz"}'] }, - }), - 0.8, - ], - ]), - }), - }), - }), - command: jest.fn(), - }), - connect: jest.fn(), - close: jest.fn(), -}); - -describe("AzureCosmosDBMongoDBSemanticCache", () => { - it("should store, retrieve, and clear cache in MongoDB", async () => { - const mockClient = createMockClient() as any; - const embeddings = new FakeEmbeddings(); - const cache = new AzureCosmosDBMongoDBSemanticCache( - embeddings, - { - client: mockClient as MongoClient, - }, - 0.8 - ); - - expect(cache).toBeDefined(); - - const llm = new FakeLLM({}); - const llmString = JSON.stringify(llm._identifyingParams()); - - await cache.update("foo", llmString, [{ text: "fizz" }]); - expect(mockClient.db().collection().insertMany).toHaveBeenCalled(); - - const result = await cache.lookup("foo", llmString); - expect(result).toEqual([{ text: "fizz" }]); - }); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/caches/caches_nosql.int.test.ts b/libs/langchain-azure-cosmosdb/src/tests/caches/caches_nosql.int.test.ts deleted file mode 100644 index 3bd064acbfbc..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/caches/caches_nosql.int.test.ts +++ /dev/null @@ -1,245 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-explicit-any */ - -import { - CosmosClient, - IndexingMode, - VectorEmbedding, - VectorEmbeddingPolicy, -} from "@azure/cosmos"; -import { DefaultAzureCredential } from "@azure/identity"; -import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai"; -import { AzureCosmosDBNoSQLSemanticCache } from "../../caches/caches_nosql.js"; - -const DATABASE_NAME = "langchainTestCacheDB"; -const CONTAINER_NAME = "testContainer"; - -function indexingPolicy(indexType: any) { - return { - indexingMode: IndexingMode.consistent, - includedPaths: [{ path: "/*" }], - excludedPaths: [{ path: '/"_etag"/?' }], - vectorIndexes: [{ path: "/embedding", type: indexType }], - }; -} - -function vectorEmbeddingPolicy( - distanceFunction: "euclidean" | "cosine" | "dotproduct", - dimension: number -): VectorEmbeddingPolicy { - return { - vectorEmbeddings: [ - { - path: "/embedding", - dataType: "float32", - distanceFunction, - dimensions: dimension, - } as VectorEmbedding, - ], - }; -} - -async function initializeCache( - indexType: any, - distanceFunction: any, - similarityThreshold?: number -): Promise { - let cache: AzureCosmosDBNoSQLSemanticCache; - const embeddingModel = new OpenAIEmbeddings(); - const testEmbedding = await embeddingModel.embedDocuments(["sample text"]); - const dimension = Math.min( - testEmbedding[0].length, - indexType === "flat" ? 505 : 4096 - ); - if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) { - cache = new AzureCosmosDBNoSQLSemanticCache( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - connectionString: process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING, - indexingPolicy: indexingPolicy(indexType), - vectorEmbeddingPolicy: vectorEmbeddingPolicy( - distanceFunction, - dimension - ), - }, - similarityThreshold - ); - } else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) { - cache = new AzureCosmosDBNoSQLSemanticCache( - new OpenAIEmbeddings(), - { - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT, - indexingPolicy: indexingPolicy(indexType), - vectorEmbeddingPolicy: vectorEmbeddingPolicy( - distanceFunction, - dimension - ), - }, - similarityThreshold - ); - } else { - throw new Error( - "Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT" - ); - } - return cache; -} - -/* - * To run this test, you need have an Azure Cosmos DB for NoSQL instance - * running. You can deploy a free version on Azure Portal without any cost, - * following this guide: - * https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search - * - * You do not need to create a database or collection, it will be created - * automatically by the test. - * - * Once you have the instance running, you need to set the following environment - * variables before running the test: - * - AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT - * - AZURE_OPENAI_API_KEY - * - AZURE_OPENAI_API_INSTANCE_NAME - * - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME - * - AZURE_OPENAI_API_VERSION - */ -describe("Azure CosmosDB NoSQL Semantic Cache", () => { - beforeEach(async () => { - let client: CosmosClient; - - if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) { - client = new CosmosClient( - process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING - ); - } else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) { - client = new CosmosClient({ - endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT, - aadCredentials: new DefaultAzureCredential(), - }); - } else { - throw new Error( - "Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT" - ); - } - - // Make sure the database does not exists - try { - await client.database(DATABASE_NAME).delete(); - } catch { - // Ignore error if the database does not exist - } - }); - - it("test AzureCosmosDBNoSqlSemanticCache with cosine quantizedFlat", async () => { - const cache = await initializeCache("quantizedFlat", "cosine"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - let cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - cacheOutput = await cache.lookup("bar", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); - - it("test AzureCosmosDBNoSqlSemanticCache with cosine flat", async () => { - const cache = await initializeCache("flat", "cosine"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - let cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - cacheOutput = await cache.lookup("bar", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); - - it("test AzureCosmosDBNoSqlSemanticCache with dotProduct quantizedFlat", async () => { - const cache = await initializeCache("quantizedFlat", "dotproduct"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - let cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - cacheOutput = await cache.lookup("bar", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); - - it("test AzureCosmosDBNoSqlSemanticCache with dotProduct flat", async () => { - const cache = await initializeCache("flat", "cosine"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - let cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - cacheOutput = await cache.lookup("bar", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); - - it("test AzureCosmosDBNoSqlSemanticCache with euclidean quantizedFlat", async () => { - const cache = await initializeCache("quantizedFlat", "euclidean"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - let cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - cacheOutput = await cache.lookup("bar", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); - - it("test AzureCosmosDBNoSqlSemanticCache with euclidean flat", async () => { - const cache = await initializeCache("flat", "euclidean"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const llmString = JSON.stringify(model._identifyingParams); - await cache.update("foo", llmString, [{ text: "fizz" }]); - - let cacheOutput = await cache.lookup("foo", llmString); - expect(cacheOutput).toEqual([{ text: "fizz" }]); - - cacheOutput = await cache.lookup("bar", llmString); - expect(cacheOutput).toEqual(null); - - await cache.clear(llmString); - }); - - it("test AzureCosmosDBNoSqlSemanticCache response according to similarity score", async () => { - const cache = await initializeCache("quantizedFlat", "cosine"); - const model = new ChatOpenAI({ model: "gpt-4o-mini", cache }); - const response1 = await model.invoke( - "Where is the headquarter of Microsoft?" - ); - console.log(response1.content); - // gives similarity score of 0.56 which is less than the threshold of 0.6. The cache - // will retun null which will allow the model to generate result. - const response2 = await model.invoke( - "List all Microsoft offices in India." - ); - expect(response2.content).not.toEqual(response1.content); - console.log(response2.content); - // gives similarity score of .63 > 0.6 - const response3 = await model.invoke("Tell me something about Microsoft"); - expect(response3.content).toEqual(response1.content); - console.log(response3.content); - }); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/caches/caches_nosql.test.ts b/libs/langchain-azure-cosmosdb/src/tests/caches/caches_nosql.test.ts deleted file mode 100644 index 3a7a253f22bc..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/caches/caches_nosql.test.ts +++ /dev/null @@ -1,67 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { jest } from "@jest/globals"; -import { FakeEmbeddings, FakeLLM } from "@langchain/core/utils/testing"; -import { AzureCosmosDBNoSQLSemanticCache } from "../../index.js"; - -// Create the mock Cosmos DB client -const createMockClient = () => { - let id = 0; - const client = { - databases: { - createIfNotExists: jest.fn().mockReturnThis(), - get database() { - return this; - }, - containers: { - createIfNotExists: jest.fn().mockReturnThis(), - get container() { - return this; - }, - items: { - create: jest.fn().mockImplementation((doc: any) => ({ - // eslint-disable-next-line no-plusplus - resource: { id: doc.id ?? `${id++}` }, - })), - query: jest.fn().mockReturnThis(), - fetchAll: jest.fn().mockImplementation(() => ({ - resources: [ - { - metadata: { - return_value: ['{"text": "fizz"}'], // Simulate stored serialized generation - }, - similarityScore: 0.8, - }, - ], - })), - }, - item: jest.fn().mockReturnThis(), - delete: jest.fn(), - }, - }, - }; - return client; -}; - -describe("AzureCosmosDBNoSQLSemanticCache", () => { - it("should store, retrieve, and clear cache", async () => { - const client = createMockClient(); - const embeddings = new FakeEmbeddings(); - const cache = new AzureCosmosDBNoSQLSemanticCache(embeddings, { - client: client as any, - }); - expect(cache).toBeDefined(); - - const llm = new FakeLLM({}); - const llmString = JSON.stringify(llm._identifyingParams()); - - await cache.update("foo", llmString, [{ text: "fizz" }]); - expect(client.databases.containers.items.create).toHaveBeenCalled(); - - const result = await cache.lookup("foo", llmString); - expect(result).toEqual([{ text: "fizz" }]); - expect(client.databases.containers.items.query).toHaveBeenCalled(); - - await cache.clear(llmString); - expect(client.databases.containers.delete).toHaveBeenCalled(); - }); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/chat_histories/mongodb.int.test.ts b/libs/langchain-azure-cosmosdb/src/tests/chat_histories/mongodb.int.test.ts deleted file mode 100644 index 2825b2cafab4..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/chat_histories/mongodb.int.test.ts +++ /dev/null @@ -1,146 +0,0 @@ -/* eslint-disable no-process-env */ - -import { MongoClient, ObjectId } from "mongodb"; -import { AIMessage, HumanMessage } from "@langchain/core/messages"; -import { - AzureCosmosDBMongoChatMessageHistory, - AzureCosmosDBMongoChatHistoryDBConfig, -} from "../../chat_histories/mongodb.js"; - -afterAll(async () => { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const client = new MongoClient( - process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING! - ); - await client.connect(); - await client.db("langchain").dropDatabase(); - await client.close(); -}); - -test("Test Azure Cosmos MongoDB history store", async () => { - expect(process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING).toBeDefined(); - - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const mongoClient = new MongoClient( - process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING! - ); - const dbcfg: AzureCosmosDBMongoChatHistoryDBConfig = { - client: mongoClient, - connectionString: process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING, - databaseName: "langchain", - collectionName: "chathistory", - }; - - const sessionId = new ObjectId().toString(); - const userId = new ObjectId().toString(); - const chatHistory = new AzureCosmosDBMongoChatMessageHistory( - dbcfg, - sessionId, - userId - ); - - const blankResult = await chatHistory.getMessages(); - expect(blankResult).toStrictEqual([]); - - await chatHistory.addUserMessage("Who is the best vocalist?"); - await chatHistory.addAIChatMessage("Ozzy Osbourne"); - - const expectedMessages = [ - new HumanMessage("Who is the best vocalist?"), - new AIMessage("Ozzy Osbourne"), - ]; - - const resultWithHistory = await chatHistory.getMessages(); - console.log(resultWithHistory); - expect(resultWithHistory).toEqual(expectedMessages); - - await mongoClient.close(); -}); - -test("Test clear Azure Cosmos MongoDB history store", async () => { - expect(process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING).toBeDefined(); - - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const mongoClient = new MongoClient( - process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING! - ); - const dbcfg: AzureCosmosDBMongoChatHistoryDBConfig = { - client: mongoClient, - connectionString: process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING, - databaseName: "langchain", - collectionName: "chathistory", - }; - - const sessionId = new ObjectId().toString(); - const userId = new ObjectId().toString(); - const chatHistory = new AzureCosmosDBMongoChatMessageHistory( - dbcfg, - sessionId, - userId - ); - - await chatHistory.addUserMessage("Who is the best vocalist?"); - await chatHistory.addAIChatMessage("Ozzy Osbourne"); - - const expectedMessages = [ - new HumanMessage("Who is the best vocalist?"), - new AIMessage("Ozzy Osbourne"), - ]; - - const resultWithHistory = await chatHistory.getMessages(); - expect(resultWithHistory).toEqual(expectedMessages); - - await chatHistory.clear(); - - const blankResult = await chatHistory.getMessages(); - expect(blankResult).toStrictEqual([]); - - await mongoClient.close(); -}); - -test("Test getAllSessions and clearAllSessions", async () => { - expect(process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING).toBeDefined(); - - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const mongoClient = new MongoClient( - process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING! - ); - const dbcfg: AzureCosmosDBMongoChatHistoryDBConfig = { - client: mongoClient, - connectionString: process.env.AZURE_COSMOSDB_MONGODB_CONNECTION_STRING, - databaseName: "langchain", - collectionName: "chathistory", - }; - - const sessionId1 = new ObjectId().toString(); - const userId1 = new ObjectId().toString(); - const sessionId2 = new ObjectId().toString(); - const userId2 = new ObjectId().toString(); - - const chatHistory1 = new AzureCosmosDBMongoChatMessageHistory( - dbcfg, - sessionId1, - userId1 - ); - const chatHistory2 = new AzureCosmosDBMongoChatMessageHistory( - dbcfg, - sessionId2, - userId2 - ); - - await chatHistory1.addUserMessage("What is AI?"); - await chatHistory1.addAIChatMessage("AI stands for Artificial Intelligence."); - await chatHistory2.addUserMessage("What is the best programming language?"); - await chatHistory2.addAIChatMessage("It depends on the use case."); - - const allSessions = await chatHistory1.getAllSessions(); - expect(allSessions.length).toBe(2); - expect(allSessions[0].id).toBe(sessionId1); - expect(allSessions[1].id).toBe(sessionId2); - - await chatHistory1.clearAllSessions(); - const clearedSessions = await chatHistory1.getAllSessions(); - expect(clearedSessions.length).toBe(0); - - await mongoClient.close(); -}); diff --git a/libs/langchain-azure-cosmosdb/src/tests/chat_histories/nosql.int.test.ts b/libs/langchain-azure-cosmosdb/src/tests/chat_histories/nosql.int.test.ts deleted file mode 100644 index 9a6b12b125b6..000000000000 --- a/libs/langchain-azure-cosmosdb/src/tests/chat_histories/nosql.int.test.ts +++ /dev/null @@ -1,199 +0,0 @@ -/* eslint-disable no-promise-executor-return */ -/* eslint-disable no-process-env */ - -import { expect } from "@jest/globals"; -import { HumanMessage, AIMessage } from "@langchain/core/messages"; -import { CosmosClient } from "@azure/cosmos"; -import { DefaultAzureCredential } from "@azure/identity"; -import { ObjectId } from "mongodb"; -import { AzureCosmsosDBNoSQLChatMessageHistory } from "../../chat_histories/nosql.js"; - -const DATABASE_NAME = "langchainTestDB"; -const CONTAINER_NAME = "testContainer"; - -/* - * To run this test, you need have an Azure Cosmos DB for NoSQL instance - * running. You can deploy a free version on Azure Portal without any cost, - * following this guide: - * https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search - * - * You do not need to create a database or collection, it will be created - * automatically by the test. - * - * Once you have the instance running, you need to set the following environment - * variables before running the test: - * - AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT - */ -beforeEach(async () => { - let client: CosmosClient; - - if (process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING) { - client = new CosmosClient( - process.env.AZURE_COSMOSDB_NOSQL_CONNECTION_STRING - ); - } else if (process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT) { - client = new CosmosClient({ - endpoint: process.env.AZURE_COSMOSDB_NOSQL_ENDPOINT, - aadCredentials: new DefaultAzureCredential(), - }); - } else { - throw new Error( - "Please set the environment variable AZURE_COSMOSDB_NOSQL_CONNECTION_STRING or AZURE_COSMOSDB_NOSQL_ENDPOINT" - ); - } - try { - await client.database(DATABASE_NAME).delete(); - } catch { - // Ignore error if the database does not exist - } - try { - await client.database("DbWithTTL").delete(); - } catch { - // Ignore error if the database does not exist - } -}); - -test("Test CosmosDB History Store", async () => { - const input = { - sessionId: new ObjectId().toString(), - userId: new ObjectId().toString(), - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - }; - const chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory(input); - const blankResult = await chatHistory.getMessages(); - expect(blankResult).toStrictEqual([]); - - await chatHistory.addUserMessage("Who is the best vocalist?"); - await chatHistory.addAIMessage("Ozzy Osbourne"); - - const expectedMessages = [ - new HumanMessage("Who is the best vocalist?"), - new AIMessage("Ozzy Osbourne"), - ]; - const resultWithHistory = await chatHistory.getMessages(); - expect(resultWithHistory).toEqual(expectedMessages); -}); - -test("Test clear CosmosDB history Store", async () => { - const input = { - sessionId: new ObjectId().toString(), - userId: new ObjectId().toString(), - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - }; - const chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory(input); - - await chatHistory.addUserMessage("Who is the best vocalist?"); - await chatHistory.addAIMessage("Ozzy Osbourne"); - - const expectedMessages = [ - new HumanMessage("Who is the best vocalist?"), - new AIMessage("Ozzy Osbourne"), - ]; - const resultWithHistory = await chatHistory.getMessages(); - expect(resultWithHistory).toEqual(expectedMessages); - - await chatHistory.clear(); - - const blankResult = await chatHistory.getMessages(); - expect(blankResult).toStrictEqual([]); -}); - -test("Test CosmosDB history with a TTL", async () => { - const input = { - sessionId: new ObjectId().toString(), - userId: new ObjectId().toString(), - databaseName: "DbWithTTL", - ttl: 5, - }; - const chatHistory = new AzureCosmsosDBNoSQLChatMessageHistory(input); - - await chatHistory.addUserMessage("Who is the best vocalist?"); - await chatHistory.addAIMessage("Ozzy Osbourne"); - - const expectedMessages = [ - new HumanMessage("Who is the best vocalist?"), - new AIMessage("Ozzy Osbourne"), - ]; - const resultWithHistory = await chatHistory.getMessages(); - expect(resultWithHistory).toEqual(expectedMessages); - - await new Promise((resolve) => setTimeout(resolve, 6000)); - - const expiredResult = await chatHistory.getMessages(); - expect(expiredResult).toStrictEqual([]); -}); - -test("Test clear all sessions for a user", async () => { - const input1 = { - sessionId: new Date().toISOString(), - userId: "user1", - databaseName: "DbWithTTL", - ttl: 5, - }; - const chatHistory1 = new AzureCosmsosDBNoSQLChatMessageHistory(input1); - - await chatHistory1.addUserMessage("Who is the best vocalist?"); - await chatHistory1.addAIMessage("Ozzy Osbourne"); - - const input2 = { - sessionId: new Date().toISOString(), - userId: "user1", - databaseName: "DbWithTTL", - ttl: 5, - }; - const chatHistory2 = new AzureCosmsosDBNoSQLChatMessageHistory(input2); - - await chatHistory2.addUserMessage("Who is the best vocalist?"); - await chatHistory2.addAIMessage("Ozzy Osbourne"); - - const expectedMessages = [ - new HumanMessage("Who is the best vocalist?"), - new AIMessage("Ozzy Osbourne"), - ]; - - const result1 = await chatHistory1.getMessages(); - expect(result1).toEqual(expectedMessages); - - const result2 = await chatHistory1.getMessages(); - expect(result2).toEqual(expectedMessages); - - await chatHistory1.clearAllSessions(); - - const deletedResult1 = await chatHistory1.getMessages(); - const deletedResult2 = await chatHistory2.getMessages(); - expect(deletedResult1).toStrictEqual([]); - expect(deletedResult2).toStrictEqual([]); -}); - -test("Test set context and get all sessions for a user", async () => { - const session1 = { - userId: "user1", - databaseName: DATABASE_NAME, - containerName: CONTAINER_NAME, - sessionId: new ObjectId().toString(), - }; - const context1 = { title: "Best vocalist" }; - const chatHistory1 = new AzureCosmsosDBNoSQLChatMessageHistory(session1); - - await chatHistory1.setContext(context1); - await chatHistory1.addUserMessage("Who is the best vocalist?"); - await chatHistory1.addAIMessage("Ozzy Osbourne"); - - const chatHistory2 = new AzureCosmsosDBNoSQLChatMessageHistory({ - ...session1, - sessionId: new ObjectId().toString(), - }); - const context2 = { title: "Best guitarist" }; - - await chatHistory2.addUserMessage("Who is the best guitarist?"); - await chatHistory2.addAIMessage("Jimi Hendrix"); - await chatHistory2.setContext(context2); - - const sessions = await chatHistory1.getAllSessions(); - - expect(sessions.length).toBe(2); - expect(sessions[0].context).toEqual(context1); - expect(sessions[1].context).toEqual(context2); -}); diff --git a/libs/langchain-azure-cosmosdb/tsconfig.cjs.json b/libs/langchain-azure-cosmosdb/tsconfig.cjs.json deleted file mode 100644 index a67a84ea490e..000000000000 --- a/libs/langchain-azure-cosmosdb/tsconfig.cjs.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "extends": "./tsconfig.json", - "compilerOptions": { - "module": "commonjs", - "moduleResolution": "node10", - "declaration": false - }, - "exclude": ["node_modules", "dist", "docs", "**/tests"] -} diff --git a/libs/langchain-azure-cosmosdb/tsconfig.json b/libs/langchain-azure-cosmosdb/tsconfig.json deleted file mode 100644 index 5bf2c7b56562..000000000000 --- a/libs/langchain-azure-cosmosdb/tsconfig.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "extends": "@tsconfig/recommended", - "compilerOptions": { - "outDir": "../dist", - "rootDir": "./src", - "target": "ES2021", - "lib": ["ES2021", "ES2022.Object", "DOM"], - "module": "ES2020", - "moduleResolution": "bundler", - "esModuleInterop": true, - "declaration": true, - "noImplicitReturns": true, - "noFallthroughCasesInSwitch": true, - "noUnusedLocals": true, - "noUnusedParameters": true, - "useDefineForClassFields": true, - "strictPropertyInitialization": false, - "allowJs": true, - "strict": true - }, - "include": ["src/**/*"], - "exclude": ["node_modules", "dist", "docs"] -} diff --git a/libs/langchain-azure-dynamic-sessions/.env.example b/libs/langchain-azure-dynamic-sessions/.env.example deleted file mode 100644 index 688d8c864e65..000000000000 --- a/libs/langchain-azure-dynamic-sessions/.env.example +++ /dev/null @@ -1,2 +0,0 @@ -# Azure Container App Session Pool Management Endpoint -AZURE_CONTAINER_APP_SESSION_POOL_MANAGEMENT_ENDPOINT= diff --git a/libs/langchain-azure-dynamic-sessions/.eslintrc.cjs b/libs/langchain-azure-dynamic-sessions/.eslintrc.cjs deleted file mode 100644 index 344f8a9d6cd9..000000000000 --- a/libs/langchain-azure-dynamic-sessions/.eslintrc.cjs +++ /dev/null @@ -1,66 +0,0 @@ -module.exports = { - extends: [ - "airbnb-base", - "eslint:recommended", - "prettier", - "plugin:@typescript-eslint/recommended", - ], - parserOptions: { - ecmaVersion: 12, - parser: "@typescript-eslint/parser", - project: "./tsconfig.json", - sourceType: "module", - }, - plugins: ["@typescript-eslint", "no-instanceof"], - ignorePatterns: [ - ".eslintrc.cjs", - "scripts", - "node_modules", - "dist", - "dist-cjs", - "*.js", - "*.cjs", - "*.d.ts", - ], - rules: { - "no-process-env": 2, - "no-instanceof/no-instanceof": 2, - "@typescript-eslint/explicit-module-boundary-types": 0, - "@typescript-eslint/no-empty-function": 0, - "@typescript-eslint/no-shadow": 0, - "@typescript-eslint/no-empty-interface": 0, - "@typescript-eslint/no-use-before-define": ["error", "nofunc"], - "@typescript-eslint/no-unused-vars": ["warn", { args: "none" }], - "@typescript-eslint/no-floating-promises": "error", - "@typescript-eslint/no-misused-promises": "error", - camelcase: 0, - "class-methods-use-this": 0, - "import/extensions": [2, "ignorePackages"], - "import/no-extraneous-dependencies": [ - "error", - { devDependencies: ["**/*.test.ts"] }, - ], - "import/no-unresolved": 0, - "import/prefer-default-export": 0, - "keyword-spacing": "error", - "max-classes-per-file": 0, - "max-len": 0, - "no-await-in-loop": 0, - "no-bitwise": 0, - "no-console": 0, - "no-restricted-syntax": 0, - "no-shadow": 0, - "no-continue": 0, - "no-void": 0, - "no-underscore-dangle": 0, - "no-use-before-define": 0, - "no-useless-constructor": 0, - "no-return-await": 0, - "consistent-return": 0, - "no-else-return": 0, - "func-names": 0, - "no-lonely-if": 0, - "prefer-rest-params": 0, - "new-cap": ["error", { properties: false, capIsNew: false }], - }, -}; diff --git a/libs/langchain-azure-dynamic-sessions/.gitignore b/libs/langchain-azure-dynamic-sessions/.gitignore deleted file mode 100644 index c10034e2f1be..000000000000 --- a/libs/langchain-azure-dynamic-sessions/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -index.cjs -index.js -index.d.ts -index.d.cts -node_modules -dist -.yarn diff --git a/libs/langchain-azure-dynamic-sessions/.prettierrc b/libs/langchain-azure-dynamic-sessions/.prettierrc deleted file mode 100644 index ba08ff04f677..000000000000 --- a/libs/langchain-azure-dynamic-sessions/.prettierrc +++ /dev/null @@ -1,19 +0,0 @@ -{ - "$schema": "https://json.schemastore.org/prettierrc", - "printWidth": 80, - "tabWidth": 2, - "useTabs": false, - "semi": true, - "singleQuote": false, - "quoteProps": "as-needed", - "jsxSingleQuote": false, - "trailingComma": "es5", - "bracketSpacing": true, - "arrowParens": "always", - "requirePragma": false, - "insertPragma": false, - "proseWrap": "preserve", - "htmlWhitespaceSensitivity": "css", - "vueIndentScriptAndStyle": false, - "endOfLine": "lf" -} diff --git a/libs/langchain-azure-dynamic-sessions/.release-it.json b/libs/langchain-azure-dynamic-sessions/.release-it.json deleted file mode 100644 index 522ee6abf705..000000000000 --- a/libs/langchain-azure-dynamic-sessions/.release-it.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "github": { - "release": true, - "autoGenerate": true, - "tokenRef": "GITHUB_TOKEN_RELEASE" - }, - "npm": { - "versionArgs": ["--workspaces-update=false"] - } -} diff --git a/libs/langchain-azure-dynamic-sessions/LICENSE b/libs/langchain-azure-dynamic-sessions/LICENSE deleted file mode 100644 index 8cd8f501eb49..000000000000 --- a/libs/langchain-azure-dynamic-sessions/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License - -Copyright (c) 2023 LangChain - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/libs/langchain-azure-dynamic-sessions/README.md b/libs/langchain-azure-dynamic-sessions/README.md index de8c15bbba0b..6d28825619da 100644 --- a/libs/langchain-azure-dynamic-sessions/README.md +++ b/libs/langchain-azure-dynamic-sessions/README.md @@ -1,60 +1,3 @@ -# @langchain/azure-dynamic-sessions +# @langchain/azure-dynamic-sessions -This package contains the [Azure Container Apps dynamic sessions](https://learn.microsoft.com/azure/container-apps/sessions) tool integration. - -Learn more about how to use this tool in the [LangChain documentation](https://js.langchain.com/docs/integrations/tools/azure_dynamic_sessions). - -## Installation - -```bash npm2yarn -npm install @langchain/azure-dynamic-sessions @langchain/core -``` - -This package, along with the main LangChain package, depends on [`@langchain/core`](https://npmjs.com/package/@langchain/core/). -If you are using this package with other LangChain packages, you should make sure that all of the packages depend on the same instance of @langchain/core. -You can do so by adding appropriate fields to your project's `package.json` like this: - -```json -{ - "name": "your-project", - "version": "0.0.0", - "dependencies": { - "@langchain/azure-openai": "^0.0.4", - "@langchain/core": "^0.3.0" - }, - "resolutions": { - "@langchain/core": "^0.3.0" - }, - "overrides": { - "@langchain/core": "^0.3.0" - }, - "pnpm": { - "overrides": { - "@langchain/core": "^0.3.0" - } - } -} -``` - -The field you need depends on the package manager you're using, but we recommend adding a field for the common `yarn`, `npm`, and `pnpm` to maximize compatibility. - -## Tool usage - -```typescript -import { SessionsPythonREPLTool } from "@langchain/azure-dynamic-sessions"; - -const tool = new SessionsPythonREPLTool({ - poolManagementEndpoint: - process.env.AZURE_CONTAINER_APP_SESSION_POOL_MANAGEMENT_ENDPOINT || "", -}); - -const result = await tool.invoke("print('Hello, World!')\n1+2"); - -console.log(result); - -// { -// stdout: "Hello, World!\n", -// stderr: "", -// result: 3, -// } -``` +This package has moved and its code is now available in the dedicated [langchain-azure-js repository](https://github.com/langchain-ai/langchain-azure-js/tree/migrate-packages/libs/azure-dynamic-sessions). diff --git a/libs/langchain-azure-dynamic-sessions/jest.config.cjs b/libs/langchain-azure-dynamic-sessions/jest.config.cjs deleted file mode 100644 index 994826496bc5..000000000000 --- a/libs/langchain-azure-dynamic-sessions/jest.config.cjs +++ /dev/null @@ -1,21 +0,0 @@ -/** @type {import('ts-jest').JestConfigWithTsJest} */ -module.exports = { - preset: "ts-jest/presets/default-esm", - testEnvironment: "./jest.env.cjs", - modulePathIgnorePatterns: ["dist/", "docs/"], - moduleNameMapper: { - "^(\\.{1,2}/.*)\\.js$": "$1", - }, - transform: { - "^.+\\.tsx?$": ["@swc/jest"], - }, - transformIgnorePatterns: [ - "/node_modules/", - "\\.pnp\\.[^\\/]+$", - "./scripts/jest-setup-after-env.js", - ], - setupFiles: ["dotenv/config"], - testTimeout: 20_000, - passWithNoTests: true, - collectCoverageFrom: ["src/**/*.ts"], -}; diff --git a/libs/langchain-azure-dynamic-sessions/jest.env.cjs b/libs/langchain-azure-dynamic-sessions/jest.env.cjs deleted file mode 100644 index 2ccedccb8672..000000000000 --- a/libs/langchain-azure-dynamic-sessions/jest.env.cjs +++ /dev/null @@ -1,12 +0,0 @@ -const { TestEnvironment } = require("jest-environment-node"); - -class AdjustedTestEnvironmentToSupportFloat32Array extends TestEnvironment { - constructor(config, context) { - // Make `instanceof Float32Array` return true in tests - // to avoid https://github.com/xenova/transformers.js/issues/57 and https://github.com/jestjs/jest/issues/2549 - super(config, context); - this.global.Float32Array = Float32Array; - } -} - -module.exports = AdjustedTestEnvironmentToSupportFloat32Array; diff --git a/libs/langchain-azure-dynamic-sessions/langchain.config.js b/libs/langchain-azure-dynamic-sessions/langchain.config.js deleted file mode 100644 index 46b1a2b31264..000000000000 --- a/libs/langchain-azure-dynamic-sessions/langchain.config.js +++ /dev/null @@ -1,22 +0,0 @@ -import { resolve, dirname } from "node:path"; -import { fileURLToPath } from "node:url"; - -/** - * @param {string} relativePath - * @returns {string} - */ -function abs(relativePath) { - return resolve(dirname(fileURLToPath(import.meta.url)), relativePath); -} - -export const config = { - internals: [/node\:/, /@langchain\/core\//], - entrypoints: { - index: "index", - }, - requiresOptionalDependency: [], - tsConfigPath: resolve("./tsconfig.json"), - cjsSource: "./dist-cjs", - cjsDestination: "./dist", - abs, -}; diff --git a/libs/langchain-azure-dynamic-sessions/package.json b/libs/langchain-azure-dynamic-sessions/package.json deleted file mode 100644 index 8c561880447e..000000000000 --- a/libs/langchain-azure-dynamic-sessions/package.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "name": "@langchain/azure-dynamic-sessions", - "version": "0.2.0", - "description": "Sample integration for LangChain.js", - "type": "module", - "engines": { - "node": ">=18" - }, - "main": "./index.js", - "types": "./index.d.ts", - "repository": { - "type": "git", - "url": "git@github.com:langchain-ai/langchainjs.git" - }, - "homepage": "https://github.com/langchain-ai/langchainjs/tree/main/libs/langchain-INTEGRATION_NAME/", - "scripts": { - "build": "yarn turbo:command build:internal --filter=@langchain/azure-dynamic-sessions", - "build:internal": "yarn lc_build --create-entrypoints --pre --tree-shaking", - "lint:eslint": "NODE_OPTIONS=--max-old-space-size=4096 eslint --cache --ext .ts,.js src/", - "lint:dpdm": "dpdm --skip-dynamic-imports circular --exit-code circular:1 --no-warning --no-tree src/*.ts src/**/*.ts", - "lint": "yarn lint:eslint && yarn lint:dpdm", - "lint:fix": "yarn lint:eslint --fix && yarn lint:dpdm", - "clean": "rm -rf .turbo dist/", - "prepack": "yarn build", - "test": "NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%", - "test:watch": "NODE_OPTIONS=--experimental-vm-modules jest --watch --testPathIgnorePatterns=\\.int\\.test.ts", - "test:single": "NODE_OPTIONS=--experimental-vm-modules yarn run jest --config jest.config.cjs --testTimeout 100000", - "test:int": "NODE_OPTIONS=--experimental-vm-modules jest --testPathPattern=\\.int\\.test.ts --testTimeout 100000 --maxWorkers=50%", - "format": "prettier --config .prettierrc --write \"src\"", - "format:check": "prettier --config .prettierrc --check \"src\"" - }, - "author": "LangChain", - "license": "MIT", - "dependencies": { - "@azure/identity": "^4.2.1", - "uuid": "^10.0.0" - }, - "peerDependencies": { - "@langchain/core": ">=0.2.21 <0.4.0" - }, - "devDependencies": { - "@jest/globals": "^29.5.0", - "@langchain/core": "workspace:*", - "@langchain/scripts": ">=0.1.0 <0.2.0", - "@swc/core": "^1.3.90", - "@swc/jest": "^0.2.29", - "@tsconfig/recommended": "^1.0.3", - "@types/uuid": "^9", - "@typescript-eslint/eslint-plugin": "^6.12.0", - "@typescript-eslint/parser": "^6.12.0", - "dotenv": "^16.4.5", - "dpdm": "^3.14.0", - "eslint": "^8.33.0", - "eslint-config-airbnb-base": "^15.0.0", - "eslint-config-prettier": "^8.6.0", - "eslint-plugin-import": "^2.27.5", - "eslint-plugin-no-instanceof": "^1.0.1", - "eslint-plugin-prettier": "^4.2.1", - "jest": "^29.5.0", - "jest-environment-node": "^29.6.4", - "prettier": "^2.8.3", - "release-it": "^18.1.2", - "rollup": "^4.5.2", - "ts-jest": "^29.1.0", - "typescript": "~5.8.3" - }, - "publishConfig": { - "access": "public" - }, - "exports": { - ".": { - "types": { - "import": "./index.d.ts", - "require": "./index.d.cts", - "default": "./index.d.ts" - }, - "import": "./index.js", - "require": "./index.cjs" - }, - "./package.json": "./package.json" - }, - "files": [ - "dist/", - "index.cjs", - "index.js", - "index.d.ts", - "index.d.cts" - ] -} diff --git a/libs/langchain-azure-dynamic-sessions/scripts/jest-setup-after-env.js b/libs/langchain-azure-dynamic-sessions/scripts/jest-setup-after-env.js deleted file mode 100644 index 7323083d0ea5..000000000000 --- a/libs/langchain-azure-dynamic-sessions/scripts/jest-setup-after-env.js +++ /dev/null @@ -1,9 +0,0 @@ -import { awaitAllCallbacks } from "@langchain/core/callbacks/promises"; -import { afterAll, jest } from "@jest/globals"; - -afterAll(awaitAllCallbacks); - -// Allow console.log to be disabled in tests -if (process.env.DISABLE_CONSOLE_LOGS === "true") { - console.log = jest.fn(); -} diff --git a/libs/langchain-azure-dynamic-sessions/src/index.ts b/libs/langchain-azure-dynamic-sessions/src/index.ts deleted file mode 100644 index b11290199593..000000000000 --- a/libs/langchain-azure-dynamic-sessions/src/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "./tools.js"; diff --git a/libs/langchain-azure-dynamic-sessions/src/tests/tools.int.test.ts b/libs/langchain-azure-dynamic-sessions/src/tests/tools.int.test.ts deleted file mode 100644 index 3ca451d605ea..000000000000 --- a/libs/langchain-azure-dynamic-sessions/src/tests/tools.int.test.ts +++ /dev/null @@ -1,46 +0,0 @@ -/* eslint-disable no-process-env */ -import { test } from "@jest/globals"; -import { config } from "dotenv"; -import { SessionsPythonREPLTool } from "../tools.js"; - -config(); - -/* - * To run these tests, you need have an Azure Container Apps dynamic session - * instance running. - * See the following link for more information: - * https://learn.microsoft.com/azure/container-apps/sessions - * - * Once you have the instance running, you need to set the following environment - * variable before running the test: - * - AZURE_CONTAINER_APP_SESSION_POOL_MANAGEMENT_ENDPOINT - */ - -test("SessionsPythonREPLTool end-to-end test", async () => { - const tool = new SessionsPythonREPLTool(); - const result = await tool.invoke("print('Hello, World!')\n1+1"); - expect(JSON.parse(result)).toStrictEqual({ - stdout: "Hello, World!\n", - stderr: "", - result: 2, - }); -}); - -test("SessionsPythonREPLTool upload file end-to-end test", async () => { - const tool = new SessionsPythonREPLTool(); - const result = await tool.uploadFile({ - data: new Blob(["hello world!"], { type: "application/octet-stream" }), - remoteFilename: "test.txt", - }); - expect(result.filename).toBe("test.txt"); - expect(result.size).toBe(12); - - const downloadBlob = await tool.downloadFile({ - remoteFilename: "test.txt", - }); - const downloadText = await downloadBlob.text(); - expect(downloadText).toBe("hello world!"); - - const listResult = await tool.listFiles(); - expect(listResult.length).toBe(1); -}); diff --git a/libs/langchain-azure-dynamic-sessions/src/tests/tools.test.ts b/libs/langchain-azure-dynamic-sessions/src/tests/tools.test.ts deleted file mode 100644 index 26dc80a48d7f..000000000000 --- a/libs/langchain-azure-dynamic-sessions/src/tests/tools.test.ts +++ /dev/null @@ -1,133 +0,0 @@ -import { test, jest, describe, beforeEach } from "@jest/globals"; -import { DefaultAzureCredential } from "@azure/identity"; -import { SessionsPythonREPLTool } from "../index.js"; - -describe("SessionsPythonREPLTool", () => { - describe("Default access token provider", () => { - let defaultAzureADTokenProvider: () => Promise; - let getTokenMock: jest.SpiedFunction; - beforeEach(() => { - const tool = new SessionsPythonREPLTool({ - poolManagementEndpoint: "https://poolmanagement.com", - sessionId: "session-id", - }); - defaultAzureADTokenProvider = tool.azureADTokenProvider; - getTokenMock = jest.spyOn(DefaultAzureCredential.prototype, "getToken"); - getTokenMock.mockClear(); - }); - - afterEach(() => { - jest.restoreAllMocks(); - jest.useRealTimers(); - }); - - test("Should use cached token when not expiring", async () => { - jest.useFakeTimers(); - jest.setSystemTime(new Date("2024-01-01 10:00:00")); - getTokenMock.mockImplementationOnce(async () => ({ - token: "test-token", - expiresOnTimestamp: new Date("2024-01-01 11:00:00").getTime(), - })); - - let token = await defaultAzureADTokenProvider(); - expect(token).toBe("test-token"); - expect(getTokenMock).toHaveBeenCalledTimes(1); - expect(getTokenMock.mock.calls[0][0]).toEqual([ - "https://acasessions.io/.default", - ]); - - getTokenMock.mockImplementationOnce(async () => ({ - token: "test-token2", - expiresOnTimestamp: new Date("2024-01-01 11:00:00").getTime(), - })); - - token = await defaultAzureADTokenProvider(); - expect(token).toBe("test-token"); - expect(getTokenMock).toHaveBeenCalledTimes(1); - }); - - test("Should refresh token when expired", async () => { - jest.useFakeTimers(); - jest.setSystemTime(new Date("2024-01-01 10:00:00")); - getTokenMock.mockImplementationOnce(async () => ({ - token: "test-token1", - expiresOnTimestamp: new Date("2024-01-01 10:30:00").getTime(), - })); - - let token = await defaultAzureADTokenProvider(); - expect(token).toBe("test-token1"); - expect(getTokenMock).toHaveBeenCalledTimes(1); - - jest.setSystemTime(new Date("2024-01-01 10:31:00")); - getTokenMock.mockImplementationOnce(async () => ({ - token: "test-token2", - expiresOnTimestamp: new Date("2024-01-01 11:00:00").getTime(), - })); - - token = await defaultAzureADTokenProvider(); - expect(token).toBe("test-token2"); - expect(getTokenMock).toHaveBeenCalledTimes(2); - }); - }); - - describe("Invoke with Python code", () => { - test("Should return correct output", async () => { - const tool = new SessionsPythonREPLTool({ - poolManagementEndpoint: - "https://acasessions.io/subscriptions/subscription-id/resourceGroups/resource-group/sessionPools/session-pool/", - sessionId: "session-id", - }); - - const getTokenMock = jest.spyOn( - DefaultAzureCredential.prototype, - "getToken" - ); - getTokenMock.mockResolvedValue({ - token: "test-token", - expiresOnTimestamp: new Date().getTime() + 1000 * 60 * 60, - }); - - const fetchMock = jest.spyOn(global, "fetch"); - fetchMock.mockResolvedValue({ - ok: true, - json: async () => ({ - $id: "1", - properties: { - $id: "2", - status: "Success", - stdout: "hello\n", - stderr: "", - result: 2, - executionTimeInMilliseconds: 35, - }, - }), - } as Response); - - const output = await tool.invoke("print('hello')\n1+1"); - expect(JSON.parse(output)).toStrictEqual({ - stdout: "hello\n", - stderr: "", - result: 2, - }); - expect(fetchMock).toHaveBeenCalledTimes(1); - expect(fetchMock).toHaveBeenCalledWith( - "https://acasessions.io/subscriptions/subscription-id/resourceGroups/resource-group/sessionPools/session-pool/code/execute?identifier=session-id&api-version=2024-02-02-preview", - { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: "Bearer test-token", - "User-Agent": expect.any(String), - }, - body: JSON.stringify({ - properties: { - codeInputType: "inline", - executionType: "synchronous", - code: "print('hello')\n1+1", - }, - }), - } - ); - }); - }); -}); diff --git a/libs/langchain-azure-dynamic-sessions/src/tools.ts b/libs/langchain-azure-dynamic-sessions/src/tools.ts deleted file mode 100644 index eb94c5664cb7..000000000000 --- a/libs/langchain-azure-dynamic-sessions/src/tools.ts +++ /dev/null @@ -1,238 +0,0 @@ -import { Tool } from "@langchain/core/tools"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { - DefaultAzureCredential, - getBearerTokenProvider, -} from "@azure/identity"; -import { v4 as uuidv4 } from "uuid"; - -const userAgentPrefix = "langchainjs-azure-dynamic-sessions"; - -let userAgent = ""; -async function getUserAgentSuffix(): Promise { - if (!userAgent) { - userAgent = `${userAgentPrefix} (Language=JavaScript; node.js/${process.version}; ${process.platform}; ${process.arch})`; - } - return userAgent; -} - -export interface SessionsPythonREPLToolParams { - /** - * The endpoint of the pool management service. - */ - poolManagementEndpoint: string; - - /** - * The session ID. If not provided, a new session ID will be generated. - */ - sessionId?: string; - - /** - * A function that returns the access token to be used for authentication. - * If not provided, a default implementation that uses the DefaultAzureCredential - * will be used. - * - * @returns The access token to be used for authentication. - */ - azureADTokenProvider?: () => Promise; -} - -export interface RemoteFile { - /** - * The filename of the file. - */ - filename: string; - - /** - * The size of the file in bytes. - */ - size: number; - - /** - * The last modified time of the file. - */ - last_modified_time: string; - - /** - * The identifier of the file. - */ - $id: string; -} - -export class SessionsPythonREPLTool extends Tool { - static lc_name() { - return "SessionsPythonREPLTool"; - } - - lc_serializable = true; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - poolManagementEndpoint: - "AZURE_CONTAINER_APP_SESSION_POOL_MANAGEMENT_ENDPOINT", - }; - } - - get lc_aliases(): Record { - return { - poolManagementEndpoint: "pool_management_endpoint", - sessionId: "session_id", - }; - } - - name = "sessions-python-repl-tool"; - - description = - "A Python shell. Use this to execute python commands " + - "when you need to perform calculations or computations. " + - "Input should be a valid python command. " + - "Returns the result, stdout, and stderr. "; - - poolManagementEndpoint: string; - - sessionId: string; - - azureADTokenProvider: () => Promise; - - constructor(params?: SessionsPythonREPLToolParams) { - super(); - - this.poolManagementEndpoint = - params?.poolManagementEndpoint ?? - getEnvironmentVariable( - "AZURE_CONTAINER_APP_SESSION_POOL_MANAGEMENT_ENDPOINT" - ) ?? - ""; - - if (!this.poolManagementEndpoint) { - throw new Error("poolManagementEndpoint must be defined."); - } - - this.sessionId = params?.sessionId ?? uuidv4(); - this.azureADTokenProvider = - params?.azureADTokenProvider ?? defaultAzureADTokenProvider(); - } - - _buildUrl(path: string) { - let url = `${this.poolManagementEndpoint}${ - this.poolManagementEndpoint.endsWith("/") ? "" : "/" - }${path}`; - url += url.includes("?") ? "&" : "?"; - url += `identifier=${encodeURIComponent(this.sessionId)}`; - url += `&api-version=2024-02-02-preview`; - return url; - } - - async _call(pythonCode: string) { - const token = await this.azureADTokenProvider(); - const apiUrl = this._buildUrl("code/execute"); - const headers = { - "Content-Type": "application/json", - Authorization: `Bearer ${token}`, - "User-Agent": await getUserAgentSuffix(), - }; - const body = JSON.stringify({ - properties: { - codeInputType: "inline", - executionType: "synchronous", - code: pythonCode, - }, - }); - - const response = await fetch(apiUrl, { - method: "POST", - headers, - body, - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - const { properties } = await response.json(); - const output = { - result: properties.result, - stdout: properties.stdout, - stderr: properties.stderr, - }; - return JSON.stringify(output, null, 2); - } - - async uploadFile(params: { - data: Blob; - remoteFilename: string; - }): Promise { - const token = await this.azureADTokenProvider(); - const apiUrl = this._buildUrl("files/upload"); - const headers = { - Authorization: `Bearer ${token}`, - "User-Agent": await getUserAgentSuffix(), - }; - const formData = new FormData(); - formData.append("file", params.data, params.remoteFilename); - - const response = await fetch(apiUrl, { - method: "POST", - headers, - body: formData, - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - const json = await response.json(); - return json.value[0].properties as RemoteFile; - } - - async downloadFile(params: { remoteFilename: string }): Promise { - const token = await this.azureADTokenProvider(); - const apiUrl = this._buildUrl(`files/content/${params.remoteFilename}`); - const headers = { - Authorization: `Bearer ${token}`, - "User-Agent": await getUserAgentSuffix(), - }; - - const response = await fetch(apiUrl, { - method: "GET", - headers, - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - return await response.blob(); - } - - async listFiles(): Promise { - const token = await this.azureADTokenProvider(); - const apiUrl = this._buildUrl("files"); - const headers = { - Authorization: `Bearer ${token}`, - "User-Agent": await getUserAgentSuffix(), - }; - - const response = await fetch(apiUrl, { - method: "GET", - headers, - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - const json = await response.json(); - const list = json.value.map( - (x: { properties: RemoteFile }) => x.properties - ); - return list as RemoteFile[]; - } -} - -function defaultAzureADTokenProvider() { - return getBearerTokenProvider( - new DefaultAzureCredential(), - "https://acasessions.io/.default" - ); -} diff --git a/libs/langchain-azure-dynamic-sessions/tsconfig.cjs.json b/libs/langchain-azure-dynamic-sessions/tsconfig.cjs.json deleted file mode 100644 index a67a84ea490e..000000000000 --- a/libs/langchain-azure-dynamic-sessions/tsconfig.cjs.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "extends": "./tsconfig.json", - "compilerOptions": { - "module": "commonjs", - "moduleResolution": "node10", - "declaration": false - }, - "exclude": ["node_modules", "dist", "docs", "**/tests"] -} diff --git a/libs/langchain-azure-dynamic-sessions/tsconfig.json b/libs/langchain-azure-dynamic-sessions/tsconfig.json deleted file mode 100644 index 5bf2c7b56562..000000000000 --- a/libs/langchain-azure-dynamic-sessions/tsconfig.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "extends": "@tsconfig/recommended", - "compilerOptions": { - "outDir": "../dist", - "rootDir": "./src", - "target": "ES2021", - "lib": ["ES2021", "ES2022.Object", "DOM"], - "module": "ES2020", - "moduleResolution": "bundler", - "esModuleInterop": true, - "declaration": true, - "noImplicitReturns": true, - "noFallthroughCasesInSwitch": true, - "noUnusedLocals": true, - "noUnusedParameters": true, - "useDefineForClassFields": true, - "strictPropertyInitialization": false, - "allowJs": true, - "strict": true - }, - "include": ["src/**/*"], - "exclude": ["node_modules", "dist", "docs"] -} diff --git a/libs/langchain-azure-openai/.eslintrc.cjs b/libs/langchain-azure-openai/.eslintrc.cjs deleted file mode 100644 index 0493060c302f..000000000000 --- a/libs/langchain-azure-openai/.eslintrc.cjs +++ /dev/null @@ -1,78 +0,0 @@ -module.exports = { - extends: [ - "airbnb-base", - "eslint:recommended", - "prettier", - "plugin:@typescript-eslint/recommended", - ], - parserOptions: { - ecmaVersion: 12, - parser: "@typescript-eslint/parser", - project: "./tsconfig.json", - sourceType: "module", - }, - plugins: ["@typescript-eslint", "no-instanceof", "eslint-plugin-jest"], - ignorePatterns: [ - "src/utils/@cfworker", - "src/utils/fast-json-patch", - "src/utils/js-sha1", - ".eslintrc.cjs", - "scripts", - "node_modules", - "dist", - "dist-cjs", - "*.js", - "*.cjs", - "*.d.ts", - ], - rules: { - "no-process-env": 2, - "no-instanceof/no-instanceof": 2, - "@typescript-eslint/explicit-module-boundary-types": 0, - "@typescript-eslint/no-empty-function": 0, - "@typescript-eslint/no-shadow": 0, - "@typescript-eslint/no-empty-interface": 0, - "@typescript-eslint/no-use-before-define": ["error", "nofunc"], - "@typescript-eslint/no-unused-vars": ["warn", { args: "none" }], - "@typescript-eslint/no-floating-promises": "error", - "@typescript-eslint/no-misused-promises": "error", - camelcase: 0, - "class-methods-use-this": 0, - "import/extensions": [2, "ignorePackages"], - "import/no-extraneous-dependencies": [ - "error", - { devDependencies: ["**/*.test.ts"] }, - ], - "import/no-unresolved": 0, - "import/prefer-default-export": 0, - "keyword-spacing": "error", - "max-classes-per-file": 0, - "max-len": 0, - "no-await-in-loop": 0, - "no-bitwise": 0, - "no-console": 0, - "no-restricted-syntax": 0, - "no-shadow": 0, - "no-continue": 0, - "no-void": 0, - "no-underscore-dangle": 0, - "no-use-before-define": 0, - "no-useless-constructor": 0, - "no-return-await": 0, - "consistent-return": 0, - "no-else-return": 0, - "func-names": 0, - "no-lonely-if": 0, - "prefer-rest-params": 0, - "new-cap": ["error", { properties: false, capIsNew: false }], - 'jest/no-focused-tests': 'error', - }, - overrides: [ - { - files: ['**/*.test.ts'], - rules: { - '@typescript-eslint/no-unused-vars': 'off' - } - } - ] -}; diff --git a/libs/langchain-azure-openai/.gitignore b/libs/langchain-azure-openai/.gitignore deleted file mode 100644 index c10034e2f1be..000000000000 --- a/libs/langchain-azure-openai/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -index.cjs -index.js -index.d.ts -index.d.cts -node_modules -dist -.yarn diff --git a/libs/langchain-azure-openai/.prettierrc b/libs/langchain-azure-openai/.prettierrc deleted file mode 100644 index 74c13fdf2e36..000000000000 --- a/libs/langchain-azure-openai/.prettierrc +++ /dev/null @@ -1,19 +0,0 @@ -{ - "$schema": "https://json.schemastore.org/prettierrc", - "printWidth": 80, - "tabWidth": 2, - "useTabs": false, - "semi": true, - "singleQuote": false, - "quoteProps": "as-needed", - "jsxSingleQuote": false, - "trailingComma": "es5", - "bracketSpacing": true, - "arrowParens": "always", - "requirePragma": false, - "insertPragma": false, - "proseWrap": "preserve", - "htmlWhitespaceSensitivity": "css", - "vueIndentScriptAndStyle": false, - "endOfLine": "lf" -} \ No newline at end of file diff --git a/libs/langchain-azure-openai/.release-it.json b/libs/langchain-azure-openai/.release-it.json deleted file mode 100644 index 06850ca85be1..000000000000 --- a/libs/langchain-azure-openai/.release-it.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "github": { - "release": true, - "autoGenerate": true, - "tokenRef": "GITHUB_TOKEN_RELEASE" - }, - "npm": { - "versionArgs": [ - "--workspaces-update=false" - ] - } -} diff --git a/libs/langchain-azure-openai/LICENSE b/libs/langchain-azure-openai/LICENSE deleted file mode 100644 index d5c9d8189aa9..000000000000 --- a/libs/langchain-azure-openai/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License - -Copyright (c) Harrison Chase - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/libs/langchain-azure-openai/README.md b/libs/langchain-azure-openai/README.md index d61cd9f17be3..8503581340ab 100644 --- a/libs/langchain-azure-openai/README.md +++ b/libs/langchain-azure-openai/README.md @@ -4,173 +4,4 @@ # @langchain/azure-openai -This package contains the Azure SDK for OpenAI LangChain.js integrations. - -It provides Azure OpenAI support through the [Azure SDK for OpenAI](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/openai/openai) library. - -## Installation - -```bash npm2yarn -npm install @langchain/azure-openai @langchain/core -``` - -This package, along with the main LangChain package, depends on [`@langchain/core`](https://npmjs.com/package/@langchain/core/). -If you are using this package with other LangChain packages, you should make sure that all of the packages depend on the same instance of @langchain/core. -You can do so by adding appropriate fields to your project's `package.json` like this: - -```json -{ - "name": "your-project", - "version": "0.0.0", - "dependencies": { - "@langchain/azure-openai": "^0.0.4", - "@langchain/core": "^0.3.0" - }, - "resolutions": { - "@langchain/core": "^0.3.0" - }, - "overrides": { - "@langchain/core": "^0.3.0" - }, - "pnpm": { - "overrides": { - "@langchain/core": "^0.3.0" - } - } -} -``` - -The field you need depends on the package manager you're using, but we recommend adding a field for the common `yarn`, `npm`, and `pnpm` to maximize compatibility. - -## Chat Models - -This package contains the `AzureChatOpenAI` class, which is the recommended way to interface with deployed models on Azure OpenAI. - -To use, install the requirements, and configure your environment. - -```bash -export AZURE_OPENAI_API_ENDPOINT= -export AZURE_OPENAI_API_KEY= -export AZURE_OPENAI_API_DEPLOYMENT_NAME= -``` - -Then initialize the model and make the calls: - -```typescript -import { AzureChatOpenAI } from "@langchain/azure-openai"; - -const model = new AzureChatOpenAI({ - // Note that the following are optional, and will default to the values below - // if not provided. - azureOpenAIEndpoint: process.env.AZURE_OPENAI_API_ENDPOINT, - azureOpenAIApiKey: process.env.AZURE_OPENAI_API_KEY, - azureOpenAIApiDeploymentName: process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME, -}); -const response = await model.invoke(new HumanMessage("Hello world!")); -``` - -### Streaming - -```typescript -import { AzureChatOpenAI } from "@langchain/azure-openai"; - -const model = new AzureChatOpenAI({ - // Note that the following are optional, and will default to the values below - // if not provided. - azureOpenAIEndpoint: process.env.AZURE_OPENAI_API_ENDPOINT, - azureOpenAIApiKey: process.env.AZURE_OPENAI_API_KEY, - azureOpenAIApiDeploymentName: process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME, -}); -const response = await model.stream(new HumanMessage("Hello world!")); -``` - -## Embeddings - -This package also supports embeddings with Azure OpenAI. - -```typescript -import { AzureOpenAIEmbeddings } from "@langchain/azure-openai"; - -const embeddings = new AzureOpenAIEmbeddings({ - // Note that the following are optional, and will default to the values below - // if not provided. - azureOpenAIEndpoint: process.env.AZURE_OPENAI_API_ENDPOINT, - azureOpenAIApiKey: process.env.AZURE_OPENAI_API_KEY, - azureOpenAIApiDeploymentName: process.env.AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME, -}); -const res = await embeddings.embedQuery("Hello world"); -``` - -## Using Azure managed identity - -If you're using [Azure Managed Identity](https://learn.microsoft.com/azure/ai-services/openai/how-to/managed-identity), you can also pass the credentials directly to the constructor: - -```typescript -import { DefaultAzureCredential } from "@azure/identity"; -import { AzureOpenAI } from "@langchain/azure-openai"; - -const credentials = new DefaultAzureCredential(); - -const model = new AzureOpenAI({ - credentials, - azureOpenAIEndpoint: process.env.AZURE_OPENAI_API_ENDPOINT, - azureOpenAIApiDeploymentName: process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME, -}); -``` - -## Compatibility with OpenAI API - -This library is provides compatibility with the OpenAI API. You can use an API key from OpenAI's developer portal like in the example below: - -```typescript -import { AzureOpenAI, OpenAIKeyCredential } from "@langchain/azure-openai"; - -const model = new AzureOpenAI({ - modelName: "gpt-3.5-turbo", - credentials: new OpenAIKeyCredential(""), -}); -``` - -## Development - -To develop the Azure OpenAI package, you'll need to follow these instructions: - -### Install dependencies - -```bash -yarn install -``` - -### Build the package - -```bash -yarn build -``` - -Or from the repo root: - -```bash -yarn build --filter=@langchain/azure-openai -``` - -### Run tests - -Test files should live within a `tests/` file in the `src/` folder. Unit tests should end in `.test.ts` and integration tests should -end in `.int.test.ts`: - -```bash -$ yarn test -$ yarn test:int -``` - -### Lint & Format - -Run the linter & formatter to ensure your code is up to standard: - -```bash -yarn lint && yarn format -``` - -### Adding new entrypoints - -If you add a new file to be exported, either import & re-export from `src/index.ts`, or add it to `scripts/create-entrypoints.js` and run `yarn build` to generate the new entrypoint. +This package has moved and its code is now available in the dedicated [langchain-azure-js repository](https://github.com/langchain-ai/langchain-azure-js/tree/migrate-packages/libs/azure-openai). diff --git a/libs/langchain-azure-openai/babel.config.cjs b/libs/langchain-azure-openai/babel.config.cjs deleted file mode 100644 index 7617b0c33f70..000000000000 --- a/libs/langchain-azure-openai/babel.config.cjs +++ /dev/null @@ -1,4 +0,0 @@ -// babel.config.js -module.exports = { - presets: [["@babel/preset-env", { targets: { node: true } }]], -}; diff --git a/libs/langchain-azure-openai/jest.config.cjs b/libs/langchain-azure-openai/jest.config.cjs deleted file mode 100644 index 1673b585bb54..000000000000 --- a/libs/langchain-azure-openai/jest.config.cjs +++ /dev/null @@ -1,21 +0,0 @@ -/** @type {import('ts-jest').JestConfigWithTsJest} */ -module.exports = { - preset: "ts-jest/presets/default-esm", - testEnvironment: "./jest.env.cjs", - modulePathIgnorePatterns: ["dist/", "docs/"], - moduleNameMapper: { - "^(\\.{1,2}/.*)\\.js$": "$1", - }, - transform: { - '^.+\\.tsx?$': ['@swc/jest'], - }, - transformIgnorePatterns: [ - "/node_modules/", - "\\.pnp\\.[^\\/]+$", - "./scripts/jest-setup-after-env.js", - ], - setupFiles: ["dotenv/config"], - setupFilesAfterEnv: ["./scripts/jest-setup-after-env.js"], - testTimeout: 20_000, - passWithNoTests: true -}; diff --git a/libs/langchain-azure-openai/jest.env.cjs b/libs/langchain-azure-openai/jest.env.cjs deleted file mode 100644 index 2ccedccb8672..000000000000 --- a/libs/langchain-azure-openai/jest.env.cjs +++ /dev/null @@ -1,12 +0,0 @@ -const { TestEnvironment } = require("jest-environment-node"); - -class AdjustedTestEnvironmentToSupportFloat32Array extends TestEnvironment { - constructor(config, context) { - // Make `instanceof Float32Array` return true in tests - // to avoid https://github.com/xenova/transformers.js/issues/57 and https://github.com/jestjs/jest/issues/2549 - super(config, context); - this.global.Float32Array = Float32Array; - } -} - -module.exports = AdjustedTestEnvironmentToSupportFloat32Array; diff --git a/libs/langchain-azure-openai/langchain.config.js b/libs/langchain-azure-openai/langchain.config.js deleted file mode 100644 index 94df0a914545..000000000000 --- a/libs/langchain-azure-openai/langchain.config.js +++ /dev/null @@ -1,22 +0,0 @@ -import { resolve, dirname } from "node:path"; -import { fileURLToPath } from "node:url"; - -/** - * @param {string} relativePath - * @returns {string} - */ -function abs(relativePath) { - return resolve(dirname(fileURLToPath(import.meta.url)), relativePath); -} - - -export const config = { - internals: [/node\:/, /@langchain\/core\//], - entrypoints: { - index: "index", - }, - tsConfigPath: resolve("./tsconfig.json"), - cjsSource: "./dist-cjs", - cjsDestination: "./dist", - abs, -} diff --git a/libs/langchain-azure-openai/package.json b/libs/langchain-azure-openai/package.json deleted file mode 100644 index 6eb4c900a4a6..000000000000 --- a/libs/langchain-azure-openai/package.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "name": "@langchain/azure-openai", - "version": "0.0.11", - "description": "Azure SDK for OpenAI integrations for LangChain.js", - "type": "module", - "engines": { - "node": ">=18" - }, - "main": "./index.js", - "types": "./index.d.ts", - "repository": { - "type": "git", - "url": "git@github.com:langchain-ai/langchainjs.git" - }, - "scripts": { - "build": "yarn turbo:command build:internal --filter=@langchain/azure-openai", - "build:internal": "yarn lc_build --create-entrypoints --pre --tree-shaking", - "lint:eslint": "NODE_OPTIONS=--max-old-space-size=4096 eslint --cache --ext .ts,.js src/", - "lint:dpdm": "dpdm --skip-dynamic-imports circular --exit-code circular:1 --no-warning --no-tree src/*.ts src/**/*.ts", - "lint": "yarn lint:eslint && yarn lint:dpdm", - "lint:fix": "yarn lint:eslint --fix && yarn lint:dpdm", - "clean": "rm -rf .turbo dist/", - "prepack": "yarn build", - "test": "NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%", - "test:watch": "NODE_OPTIONS=--experimental-vm-modules jest --watch --testPathIgnorePatterns=\\.int\\.test.ts", - "test:single": "NODE_OPTIONS=--experimental-vm-modules yarn run jest --config jest.config.cjs --testTimeout 100000", - "test:int": "NODE_OPTIONS=--experimental-vm-modules jest --testPathPattern=\\.int\\.test.ts --testTimeout 100000 --maxWorkers=50%", - "format": "prettier --config .prettierrc --write \"src\"", - "format:check": "prettier --config .prettierrc --check \"src\"" - }, - "author": "LangChain", - "license": "MIT", - "dependencies": { - "@azure/core-auth": "^1.5.0", - "@azure/openai": "1.0.0-beta.11", - "js-tiktoken": "^1.0.12", - "zod": "^3.22.3", - "zod-to-json-schema": "3.20.3" - }, - "peerDependencies": { - "@langchain/core": ">=0.2.21 <0.4.0" - }, - "devDependencies": { - "@azure/identity": "^4.2.1", - "@jest/globals": "^29.5.0", - "@langchain/core": "workspace:*", - "@langchain/scripts": ">=0.1.0 <0.2.0", - "@langchain/standard-tests": "0.0.0", - "@swc/core": "^1.3.90", - "@swc/jest": "^0.2.29", - "dpdm": "^3.14.0", - "eslint": "^8.33.0", - "eslint-config-airbnb-base": "^15.0.0", - "eslint-config-prettier": "^8.6.0", - "eslint-plugin-import": "^2.27.5", - "eslint-plugin-jest": "^27.6.0", - "eslint-plugin-no-instanceof": "^1.0.1", - "eslint-plugin-prettier": "^4.2.1", - "jest": "^29.5.0", - "jest-environment-node": "^29.6.4", - "prettier": "^2.8.3", - "release-it": "^18.1.2", - "rimraf": "^5.0.1", - "typescript": "~5.8.3" - }, - "publishConfig": { - "access": "public" - }, - "keywords": [ - "llm", - "ai", - "gpt3", - "chain", - "prompt", - "prompt engineering", - "chatgpt", - "machine learning", - "ml", - "openai", - "embeddings", - "vectorstores", - "azure", - "azure SDK" - ], - "exports": { - ".": { - "types": { - "import": "./index.d.ts", - "require": "./index.d.cts", - "default": "./index.d.ts" - }, - "import": "./index.js", - "require": "./index.cjs" - }, - "./package.json": "./package.json" - }, - "files": [ - "dist/", - "index.cjs", - "index.js", - "index.d.ts", - "index.d.cts" - ] -} diff --git a/libs/langchain-azure-openai/scripts/jest-setup-after-env.js b/libs/langchain-azure-openai/scripts/jest-setup-after-env.js deleted file mode 100644 index 7323083d0ea5..000000000000 --- a/libs/langchain-azure-openai/scripts/jest-setup-after-env.js +++ /dev/null @@ -1,9 +0,0 @@ -import { awaitAllCallbacks } from "@langchain/core/callbacks/promises"; -import { afterAll, jest } from "@jest/globals"; - -afterAll(awaitAllCallbacks); - -// Allow console.log to be disabled in tests -if (process.env.DISABLE_CONSOLE_LOGS === "true") { - console.log = jest.fn(); -} diff --git a/libs/langchain-azure-openai/src/chat_models.ts b/libs/langchain-azure-openai/src/chat_models.ts deleted file mode 100644 index e09d2cc15a3f..000000000000 --- a/libs/langchain-azure-openai/src/chat_models.ts +++ /dev/null @@ -1,701 +0,0 @@ -import { - type OpenAIClientOptions as AzureOpenAIClientOptions, - OpenAIClient as AzureOpenAIClient, - AzureExtensionsOptions, - ChatRequestMessage, - ChatResponseMessage, - AzureKeyCredential, - ChatCompletions, - EventStream, - ChatCompletionsToolDefinition, - ChatCompletionsNamedToolSelection, - ChatCompletionsResponseFormat, - OpenAIKeyCredential, -} from "@azure/openai"; -import { - BaseChatModel, - BaseChatModelParams, -} from "@langchain/core/language_models/chat_models"; -import { - BaseFunctionCallOptions, - FunctionCallOption, - FunctionDefinition, - TokenUsage, -} from "@langchain/core/language_models/base"; -import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { - AIMessage, - AIMessageChunk, - BaseMessage, - ChatMessage, - ChatMessageChunk, - FunctionMessageChunk, - HumanMessageChunk, - SystemMessageChunk, - ToolMessage, - ToolMessageChunk, -} from "@langchain/core/messages"; -import { - ChatGeneration, - ChatGenerationChunk, - ChatResult, -} from "@langchain/core/outputs"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { - KeyCredential, - TokenCredential, - isTokenCredential, -} from "@azure/core-auth"; -import { NewTokenIndices } from "@langchain/core/callbacks/base"; -import { - AzureOpenAIInput, - OpenAIChatInput, - OpenAIChatCallOptions, -} from "./types.js"; -import { - FunctionDef, - formatFunctionDefinitions, -} from "./utils/openai-format-fndef.js"; -import { USER_AGENT_PREFIX } from "./constants.js"; - -function _convertDeltaToMessageChunk( - delta: ChatResponseMessage, - defaultRole?: string -) { - const role = delta.role ?? defaultRole; - const content = delta.content ?? ""; - let additional_kwargs; - if (delta.functionCall) { - additional_kwargs = { - function_call: delta.functionCall, - }; - } else if (delta.toolCalls) { - additional_kwargs = { - tool_calls: delta.toolCalls, - }; - } else { - additional_kwargs = {}; - } - if (role === "user") { - return new HumanMessageChunk({ content }); - } else if (role === "assistant") { - return new AIMessageChunk({ content, additional_kwargs }); - } else if (role === "system") { - return new SystemMessageChunk({ content }); - } else if (role === "function") { - return new FunctionMessageChunk({ - content, - additional_kwargs, - name: delta.role, - }); - } else if (role === "tool") { - return new ToolMessageChunk({ - content, - additional_kwargs, - tool_call_id: delta.toolCalls[0].id, - }); - } else { - return new ChatMessageChunk({ content, role }); - } -} - -function openAIResponseToChatMessage( - message: ChatResponseMessage -): BaseMessage { - switch (message.role) { - case "assistant": - return new AIMessage(message.content || "", { - function_call: message.functionCall, - tool_calls: message.toolCalls, - }); - default: - return new ChatMessage(message.content || "", message.role ?? "unknown"); - } -} - -interface OpenAILLMOutput { - tokenUsage: TokenUsage; -} - -function extractGenericMessageCustomRole(message: ChatMessage) { - if ( - message.role !== "system" && - message.role !== "assistant" && - message.role !== "user" && - message.role !== "function" && - message.role !== "tool" - ) { - console.warn(`Unknown message role: ${message.role}`); - } - - return message.role; -} - -export function messageToOpenAIRole(message: BaseMessage): string { - const type = message._getType(); - switch (type) { - case "system": - return "system"; - case "ai": - return "assistant"; - case "human": - return "user"; - case "function": - return "function"; - case "tool": - return "tool"; - case "generic": { - if (!ChatMessage.isInstance(message)) - throw new Error("Invalid generic chat message"); - return extractGenericMessageCustomRole(message); - } - default: - throw new Error(`Unknown message type: ${type}`); - } -} - -export interface ChatOpenAICallOptions - extends OpenAIChatCallOptions, - BaseFunctionCallOptions { - tools?: ChatCompletionsToolDefinition[]; - tool_choice?: ChatCompletionsNamedToolSelection; - response_format?: ChatCompletionsResponseFormat; - seed?: number; -} - -/** @deprecated Import from "@langchain/openai" instead. */ -export class AzureChatOpenAI - extends BaseChatModel - implements OpenAIChatInput, AzureOpenAIInput -{ - static lc_name() { - return "AzureChatOpenAI"; - } - - get callKeys() { - return [ - ...super.callKeys, - "options", - "function_call", - "functions", - "tools", - "tool_choice", - "promptIndex", - "response_format", - "seed", - ]; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - openAIApiKey: "OPENAI_API_KEY", - azureOpenAIApiKey: "AZURE_OPENAI_API_KEY", - azureOpenAIEndpoint: "AZURE_OPENAI_API_ENDPOINT", - azureOpenAIApiDeploymentName: "AZURE_OPENAI_API_DEPLOYMENT_NAME", - }; - } - - get lc_aliases(): Record { - return { - openAIApiKey: "openai_api_key", - azureOpenAIApiKey: "azure_openai_api_key", - azureOpenAIEndpoint: "azure_openai_api_endpoint", - azureOpenAIApiDeploymentName: "azure_openai_api_deployment_name", - }; - } - - lc_serializable = true; - - azureExtensionOptions?: AzureExtensionsOptions | undefined; - - maxTokens?: number | undefined; - - temperature: number; - - topP: number; - - logitBias?: Record | undefined; - - user?: string | undefined; - - n: number; - - presencePenalty: number; - - frequencyPenalty: number; - - stop?: string[] | undefined; - - stopSequences?: string[] | undefined; - - streaming: boolean; - - model: string; - - modelKwargs?: OpenAIChatInput["modelKwargs"]; - - timeout?: number | undefined; - - azureOpenAIEndpoint?: string; - - azureOpenAIApiKey?: string; - - apiKey?: string; - - azureOpenAIApiDeploymentName?: string; - - private client: AzureOpenAIClient; - - constructor( - fields?: Partial & - Partial & - BaseChatModelParams & { - configuration?: AzureOpenAIClientOptions; - } - ) { - super(fields ?? {}); - - this.azureOpenAIEndpoint = - fields?.azureOpenAIEndpoint ?? - getEnvironmentVariable("AZURE_OPENAI_API_ENDPOINT"); - - this.azureOpenAIApiDeploymentName = - (fields?.azureOpenAIEmbeddingsApiDeploymentName || - fields?.azureOpenAIApiDeploymentName) ?? - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME"); - - const openAiApiKey = - fields?.apiKey ?? - fields?.openAIApiKey ?? - getEnvironmentVariable("OPENAI_API_KEY"); - - this.azureOpenAIApiKey = - fields?.apiKey ?? - fields?.azureOpenAIApiKey ?? - getEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? - openAiApiKey; - this.apiKey = this.azureOpenAIApiKey; - - const azureCredential = - fields?.credentials ?? - (this.apiKey === openAiApiKey - ? new OpenAIKeyCredential(this.apiKey ?? "") - : new AzureKeyCredential(this.apiKey ?? "")); - - // eslint-disable-next-line no-instanceof/no-instanceof - const isOpenAIApiKey = azureCredential instanceof OpenAIKeyCredential; - - if (!this.apiKey && !fields?.credentials) { - throw new Error("Azure OpenAI API key not found"); - } - - if (!this.azureOpenAIEndpoint && !isOpenAIApiKey) { - throw new Error("Azure OpenAI Endpoint not found"); - } - - if (!this.azureOpenAIApiDeploymentName && !isOpenAIApiKey) { - throw new Error("Azure OpenAI Deployment name not found"); - } - - this.model = fields?.model ?? this.model; - this.modelKwargs = fields?.modelKwargs ?? {}; - this.timeout = fields?.timeout; - this.temperature = fields?.temperature ?? this.temperature; - this.topP = fields?.topP ?? this.topP; - this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; - this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; - this.maxTokens = fields?.maxTokens; - this.n = fields?.n ?? this.n; - this.logitBias = fields?.logitBias; - this.stop = fields?.stopSequences ?? fields?.stop; - this.stopSequences = this.stop; - this.user = fields?.user; - this.azureExtensionOptions = fields?.azureExtensionOptions; - - this.streaming = fields?.streaming ?? false; - - const options = { - userAgentOptions: { userAgentPrefix: USER_AGENT_PREFIX }, - }; - - if (isOpenAIApiKey) { - this.client = new AzureOpenAIClient( - azureCredential as OpenAIKeyCredential - ); - } else if (isTokenCredential(azureCredential)) { - this.client = new AzureOpenAIClient( - this.azureOpenAIEndpoint ?? "", - azureCredential as TokenCredential, - options - ); - } else { - this.client = new AzureOpenAIClient( - this.azureOpenAIEndpoint ?? "", - azureCredential as KeyCredential, - options - ); - } - } - - private formatMessages(messages: BaseMessage[]): ChatRequestMessage[] { - return messages.map( - (message: BaseMessage) => - ({ - role: messageToOpenAIRole(message), - content: message.content, - name: message.name, - toolCalls: message.additional_kwargs.tool_calls, - functionCall: message.additional_kwargs.function_call, - toolCallId: (message as ToolMessage).tool_call_id, - } as ChatRequestMessage) - ); - } - - protected async _streamChatCompletionsWithRetry( - azureOpenAIMessages: ChatRequestMessage[], - options: this["ParsedCallOptions"] - ): Promise> { - return this.caller.call(async () => { - const deploymentName = this.azureOpenAIApiDeploymentName || this.model; - - const res = await this.client.streamChatCompletions( - deploymentName, - azureOpenAIMessages, - { - functions: options?.functions, - functionCall: options?.function_call, - maxTokens: this.maxTokens, - temperature: this.temperature, - topP: this.topP, - logitBias: this.logitBias, - user: this.user, - n: this.n, - stop: this.stopSequences, - presencePenalty: this.presencePenalty, - frequencyPenalty: this.frequencyPenalty, - azureExtensionOptions: this.azureExtensionOptions, - requestOptions: { - timeout: options?.timeout ?? this.timeout, - }, - abortSignal: options?.signal ?? undefined, - tools: options?.tools, - toolChoice: options?.tool_choice, - responseFormat: options?.response_format, - seed: options?.seed, - ...this.modelKwargs, - } - ); - return res; - }); - } - - async *_streamResponseChunks( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const azureOpenAIMessages: ChatRequestMessage[] = - this.formatMessages(messages); - let defaultRole: string | undefined; - const streamIterable = await this._streamChatCompletionsWithRetry( - azureOpenAIMessages, - options - ); - - for await (const data of streamIterable) { - const choice = data?.choices[0]; - if (!choice) { - continue; - } - - const { delta } = choice; - if (!delta) { - continue; - } - const chunk = _convertDeltaToMessageChunk(delta, defaultRole); - defaultRole = delta.role ?? defaultRole; - const newTokenIndices = { - prompt: options.promptIndex ?? 0, - completion: choice.index ?? 0, - }; - if (typeof chunk.content !== "string") { - console.log( - "[WARNING]: Received non-string content from OpenAI. This is currently not supported." - ); - continue; - } - const generationChunk = new ChatGenerationChunk({ - message: chunk, - text: chunk.content, - generationInfo: newTokenIndices, - }); - yield generationChunk; - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken( - generationChunk.text ?? "", - newTokenIndices, - undefined, - undefined, - undefined, - { chunk: generationChunk } - ); - } - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - } - - async _generate( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const deploymentName = this.azureOpenAIApiDeploymentName || this.model; - const tokenUsage: TokenUsage = {}; - const azureOpenAIMessages: ChatRequestMessage[] = - this.formatMessages(messages); - - if (!this.streaming) { - const data = await this.caller.call(() => - this.client.getChatCompletions(deploymentName, azureOpenAIMessages, { - functions: options?.functions, - functionCall: options?.function_call, - maxTokens: this.maxTokens, - temperature: this.temperature, - topP: this.topP, - logitBias: this.logitBias, - user: this.user, - n: this.n, - stop: this.stopSequences, - presencePenalty: this.presencePenalty, - frequencyPenalty: this.frequencyPenalty, - azureExtensionOptions: this.azureExtensionOptions, - requestOptions: { - timeout: options?.timeout ?? this.timeout, - }, - abortSignal: options?.signal ?? undefined, - tools: options?.tools, - toolChoice: options?.tool_choice, - responseFormat: options?.response_format, - seed: options?.seed, - ...this.modelKwargs, - }) - ); - - const { completionTokens, promptTokens, totalTokens } = data?.usage ?? {}; - - if (completionTokens) { - tokenUsage.completionTokens = - (tokenUsage.completionTokens ?? 0) + completionTokens; - } - - if (promptTokens) { - tokenUsage.promptTokens = (tokenUsage.promptTokens ?? 0) + promptTokens; - } - - if (totalTokens) { - tokenUsage.totalTokens = (tokenUsage.totalTokens ?? 0) + totalTokens; - } - - const generations: ChatGeneration[] = []; - for (const part of data?.choices ?? []) { - const text = part.message?.content ?? ""; - const generation: ChatGeneration = { - text, - message: openAIResponseToChatMessage( - part.message ?? { - role: "assistant", - content: text, - toolCalls: [], - } - ), - }; - generation.generationInfo = { - ...(part.finishReason ? { finish_reason: part.finishReason } : {}), - }; - generations.push(generation); - } - return { - generations, - llmOutput: { tokenUsage }, - }; - } else { - const stream = this._streamResponseChunks(messages, options, runManager); - const finalChunks: Record = {}; - for await (const chunk of stream) { - const index = - (chunk.generationInfo as NewTokenIndices)?.completion ?? 0; - if (finalChunks[index] === undefined) { - finalChunks[index] = chunk; - } else { - finalChunks[index] = finalChunks[index].concat(chunk); - } - } - - const generations = Object.entries(finalChunks) - .sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10)) - .map(([_, value]) => value); - - const promptTokenUsage = await this.getEstimatedTokenCountFromPrompt( - messages, - options?.functions, - options?.function_call - ); - - const completionTokenUsage = await this.getNumTokensFromGenerations( - generations - ); - - tokenUsage.promptTokens = promptTokenUsage; - tokenUsage.completionTokens = completionTokenUsage; - tokenUsage.totalTokens = promptTokenUsage + completionTokenUsage; - return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } }; - } - } - - /** - * Estimate the number of tokens an array of generations have used. - */ - private async getNumTokensFromGenerations(generations: ChatGeneration[]) { - const generationUsages = await Promise.all( - generations.map(async (generation) => { - if (generation.message.additional_kwargs?.function_call) { - return (await this.getNumTokensFromMessages([generation.message])) - .countPerMessage[0]; - } else { - return await this.getNumTokens(generation.message.content); - } - }) - ); - - return generationUsages.reduce((a, b) => a + b, 0); - } - - _llmType() { - return "azure-openai"; - } - - /** - * Estimate the number of tokens a prompt will use. - * Modified from: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts - */ - private async getEstimatedTokenCountFromPrompt( - messages: BaseMessage[], - functions?: FunctionDefinition[], - function_call?: "none" | "auto" | FunctionCallOption - ): Promise { - // It appears that if functions are present, the first system message is padded with a trailing newline. This - // was inferred by trying lots of combinations of messages and functions and seeing what the token counts were. - - let tokens = (await this.getNumTokensFromMessages(messages)).totalCount; - - // If there are functions, add the function definitions as they count towards token usage - if (functions && function_call !== "auto") { - const promptDefinitions = formatFunctionDefinitions( - functions as unknown as FunctionDef[] - ); - tokens += await this.getNumTokens(promptDefinitions); - tokens += 9; // Add nine per completion - } - - // If there's a system message _and_ functions are present, subtract four tokens. I assume this is because - // functions typically add a system message, but reuse the first one if it's already there. This offsets - // the extra 9 tokens added by the function definitions. - if (functions && messages.find((m) => m._getType() === "system")) { - tokens -= 4; - } - - // If function_call is 'none', add one token. - // If it's a FunctionCall object, add 4 + the number of tokens in the function name. - // If it's undefined or 'auto', don't add anything. - if (function_call === "none") { - tokens += 1; - } else if (typeof function_call === "object") { - tokens += (await this.getNumTokens(function_call.name)) + 4; - } - - return tokens; - } - - async getNumTokensFromMessages(messages: BaseMessage[]) { - let totalCount = 0; - let tokensPerMessage = 0; - let tokensPerName = 0; - - // From: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb - if (this.model === "gpt-3.5-turbo-0301") { - tokensPerMessage = 4; - tokensPerName = -1; - } else { - tokensPerMessage = 3; - tokensPerName = 1; - } - - const countPerMessage = await Promise.all( - messages.map(async (message) => { - const textCount = await this.getNumTokens(message.content); - const roleCount = await this.getNumTokens(messageToOpenAIRole(message)); - const nameCount = - message.name !== undefined - ? tokensPerName + (await this.getNumTokens(message.name)) - : 0; - let count = textCount + tokensPerMessage + roleCount + nameCount; - - // From: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts messageTokenEstimate - const openAIMessage = message; - if (openAIMessage._getType() === "function") { - count -= 2; - } - if (openAIMessage.additional_kwargs?.function_call) { - count += 3; - } - if (openAIMessage?.additional_kwargs.function_call?.name) { - count += await this.getNumTokens( - openAIMessage.additional_kwargs.function_call?.name - ); - } - if (openAIMessage.additional_kwargs.function_call?.arguments) { - count += await this.getNumTokens( - // Remove newlines and spaces - JSON.stringify( - JSON.parse( - openAIMessage.additional_kwargs.function_call?.arguments - ) - ) - ); - } - - totalCount += count; - return count; - }) - ); - - totalCount += 3; // every reply is primed with <|start|>assistant<|message|> - - return { totalCount, countPerMessage }; - } - - /** @ignore */ - _combineLLMOutput(...llmOutputs: OpenAILLMOutput[]): OpenAILLMOutput { - return llmOutputs.reduce<{ - [key in keyof OpenAILLMOutput]: Required; - }>( - (acc, llmOutput) => { - if (llmOutput && llmOutput.tokenUsage) { - acc.tokenUsage.completionTokens += - llmOutput.tokenUsage.completionTokens ?? 0; - acc.tokenUsage.promptTokens += llmOutput.tokenUsage.promptTokens ?? 0; - acc.tokenUsage.totalTokens += llmOutput.tokenUsage.totalTokens ?? 0; - } - return acc; - }, - { - tokenUsage: { - completionTokens: 0, - promptTokens: 0, - totalTokens: 0, - }, - } - ); - } -} diff --git a/libs/langchain-azure-openai/src/constants.ts b/libs/langchain-azure-openai/src/constants.ts deleted file mode 100644 index 9fee1c3806bd..000000000000 --- a/libs/langchain-azure-openai/src/constants.ts +++ /dev/null @@ -1 +0,0 @@ -export const USER_AGENT_PREFIX = "langchainjs-azure-openai"; diff --git a/libs/langchain-azure-openai/src/embeddings.ts b/libs/langchain-azure-openai/src/embeddings.ts deleted file mode 100644 index 4289d8f66192..000000000000 --- a/libs/langchain-azure-openai/src/embeddings.ts +++ /dev/null @@ -1,167 +0,0 @@ -import { Embeddings } from "@langchain/core/embeddings"; -import { - type OpenAIClientOptions as AzureOpenAIClientOptions, - OpenAIClient as AzureOpenAIClient, - AzureKeyCredential, - OpenAIKeyCredential, -} from "@azure/openai"; -import { - KeyCredential, - TokenCredential, - isTokenCredential, -} from "@azure/core-auth"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { chunkArray } from "@langchain/core/utils/chunk_array"; -import { AzureOpenAIInput, AzureOpenAIEmbeddingsParams } from "./types.js"; -import { USER_AGENT_PREFIX } from "./constants.js"; - -/** @deprecated Import from "@langchain/openai" instead. */ -export class AzureOpenAIEmbeddings - extends Embeddings - implements AzureOpenAIEmbeddingsParams, AzureOpenAIInput -{ - modelName = "text-embedding-ada-002"; - - model = "text-embedding-ada-002"; - - batchSize = 512; - - stripNewLines = false; - - timeout?: number; - - user?: string; - - azureOpenAIApiKey?: string; - - apiKey?: string; - - azureOpenAIEndpoint?: string; - - azureOpenAIApiDeploymentName?: string; - - private client: AzureOpenAIClient; - - constructor( - fields?: Partial & - Partial & { - configuration?: AzureOpenAIClientOptions; - } - ) { - const fieldsWithDefaults = { maxConcurrency: 2, ...fields }; - - super(fieldsWithDefaults); - - this.azureOpenAIApiDeploymentName = - (fieldsWithDefaults?.azureOpenAIEmbeddingsApiDeploymentName || - fieldsWithDefaults?.azureOpenAIApiDeploymentName) ?? - (getEnvironmentVariable("AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME") || - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME")); - - this.azureOpenAIEndpoint = - fields?.azureOpenAIEndpoint ?? - getEnvironmentVariable("AZURE_OPENAI_API_ENDPOINT"); - - const openAiApiKey = - fields?.apiKey ?? - fields?.openAIApiKey ?? - getEnvironmentVariable("OPENAI_API_KEY"); - - this.azureOpenAIApiKey = - fields?.apiKey ?? - fields?.azureOpenAIApiKey ?? - getEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? - openAiApiKey; - this.apiKey = this.azureOpenAIApiKey; - - const azureCredential = - fields?.credentials ?? - (this.apiKey === openAiApiKey - ? new OpenAIKeyCredential(this.apiKey ?? "") - : new AzureKeyCredential(this.apiKey ?? "")); - - // eslint-disable-next-line no-instanceof/no-instanceof - const isOpenAIApiKey = azureCredential instanceof OpenAIKeyCredential; - - if (!this.apiKey && !fields?.credentials) { - throw new Error("Azure OpenAI API key not found"); - } - - if (!this.azureOpenAIEndpoint && !isOpenAIApiKey) { - throw new Error("Azure OpenAI Endpoint not found"); - } - - if (!this.azureOpenAIApiDeploymentName && !isOpenAIApiKey) { - throw new Error("Azure OpenAI Deployment name not found"); - } - - this.modelName = - fieldsWithDefaults?.model ?? fieldsWithDefaults?.modelName ?? this.model; - this.model = this.modelName; - - this.batchSize = - fieldsWithDefaults?.batchSize ?? (this.apiKey ? 1 : this.batchSize); - - this.stripNewLines = - fieldsWithDefaults?.stripNewLines ?? this.stripNewLines; - - this.timeout = fieldsWithDefaults?.timeout; - - const options = { - userAgentOptions: { userAgentPrefix: USER_AGENT_PREFIX }, - }; - - if (isOpenAIApiKey) { - this.client = new AzureOpenAIClient( - azureCredential as OpenAIKeyCredential - ); - } else if (isTokenCredential(azureCredential)) { - this.client = new AzureOpenAIClient( - this.azureOpenAIEndpoint ?? "", - azureCredential as TokenCredential, - options - ); - } else { - this.client = new AzureOpenAIClient( - this.azureOpenAIEndpoint ?? "", - azureCredential as KeyCredential, - options - ); - } - } - - async embedDocuments(texts: string[]): Promise { - const batches = chunkArray( - this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, - this.batchSize - ); - - const batchRequests = batches.map((batch) => this.getEmbeddings(batch)); - const embeddings = await Promise.all(batchRequests); - return embeddings.flat(); - } - - async embedQuery(document: string): Promise { - const input = [ - this.stripNewLines ? document.replace(/\n/g, " ") : document, - ]; - const embeddings = await this.getEmbeddings(input); - return embeddings.flat(); - } - - private async getEmbeddings(input: string[]): Promise { - const deploymentName = this.azureOpenAIApiDeploymentName || this.model; - - const res = await this.caller.call(() => - this.client.getEmbeddings(deploymentName, input, { - user: this.user, - model: this.model, - requestOptions: { - timeout: this.timeout, - }, - }) - ); - - return res.data.map((data) => data.embedding); - } -} diff --git a/libs/langchain-azure-openai/src/index.ts b/libs/langchain-azure-openai/src/index.ts deleted file mode 100644 index 20c13063eafb..000000000000 --- a/libs/langchain-azure-openai/src/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -export * from "./llms.js"; -export * from "./chat_models.js"; -export * from "./embeddings.js"; -export * from "./types.js"; -export * from "./utils/openai-format-fndef.js"; diff --git a/libs/langchain-azure-openai/src/llms.ts b/libs/langchain-azure-openai/src/llms.ts deleted file mode 100644 index fefdab7527e2..000000000000 --- a/libs/langchain-azure-openai/src/llms.ts +++ /dev/null @@ -1,430 +0,0 @@ -import type { TiktokenModel } from "js-tiktoken/lite"; -import { - type OpenAIClientOptions as AzureOpenAIClientOptions, - OpenAIClient as AzureOpenAIClient, - AzureKeyCredential, - Completions, - Choice, - OpenAIKeyCredential, -} from "@azure/openai"; -import { calculateMaxTokens } from "@langchain/core/language_models/base"; -import { - BaseLLM, - type BaseLLMParams, -} from "@langchain/core/language_models/llms"; -import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { chunkArray } from "@langchain/core/utils/chunk_array"; -import { GenerationChunk, type LLMResult } from "@langchain/core/outputs"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { - KeyCredential, - TokenCredential, - isTokenCredential, -} from "@azure/core-auth"; -import { AzureOpenAIInput, OpenAICallOptions, OpenAIInput } from "./types.js"; -import { USER_AGENT_PREFIX } from "./constants.js"; - -/** - * Interface for tracking token usage in OpenAI calls. - */ -export interface TokenUsage { - completionTokens?: number; - promptTokens?: number; - totalTokens?: number; -} - -/** @deprecated Import from "@langchain/openai" instead. */ -export class AzureOpenAI< - CallOptions extends OpenAICallOptions = OpenAICallOptions - > - extends BaseLLM - implements OpenAIInput, AzureOpenAIInput -{ - static lc_name() { - return "AzureOpenAI"; - } - - get callKeys() { - return [...super.callKeys, "options"]; - } - - lc_serializable = true; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "AZURE_OPENAI_API_KEY", - openAIApiKey: "OPENAI_API_KEY", - azureOpenAIApiKey: "AZURE_OPENAI_API_KEY", - azureOpenAIEndpoint: "AZURE_OPENAI_API_ENDPOINT", - azureOpenAIApiDeploymentName: "AZURE_OPENAI_API_DEPLOYMENT_NAME", - }; - } - - get lc_aliases(): Record { - return { - openAIApiKey: "openai_api_key", - azureOpenAIApiKey: "azure_openai_api_key", - azureOpenAIEndpoint: "azure_openai_api_endpoint", - azureOpenAIApiDeploymentName: "azure_openai_api_deployment_name", - }; - } - - temperature = 0.7; - - maxTokens = 256; - - topP = 1; - - frequencyPenalty = 0; - - presencePenalty = 0; - - n = 1; - - bestOf?: number; - - logitBias?: Record; - - model = "gpt-3.5-turbo-instruct"; - - modelKwargs?: OpenAIInput["modelKwargs"]; - - batchSize = 20; - - timeout?: number; - - stop?: string[]; - - stopSequences?: string[]; - - user?: string; - - streaming = false; - - azureOpenAIApiKey?: string; - - apiKey?: string; - - azureOpenAIEndpoint?: string; - - azureOpenAIApiDeploymentName?: string; - - logprobs?: number; - - echo?: boolean; - - private client: AzureOpenAIClient; - - constructor( - fields?: Partial & - Partial & - BaseLLMParams & { - configuration?: AzureOpenAIClientOptions; - } - ) { - super(fields ?? {}); - - this.azureOpenAIEndpoint = - fields?.azureOpenAIEndpoint ?? - getEnvironmentVariable("AZURE_OPENAI_API_ENDPOINT"); - - this.azureOpenAIApiDeploymentName = - fields?.azureOpenAIApiDeploymentName ?? - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME"); - - const openAiApiKey = - fields?.apiKey ?? - fields?.openAIApiKey ?? - getEnvironmentVariable("OPENAI_API_KEY"); - - this.azureOpenAIApiKey = - fields?.apiKey ?? - fields?.azureOpenAIApiKey ?? - getEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? - openAiApiKey; - this.apiKey = this.azureOpenAIApiKey; - - const azureCredential = - fields?.credentials ?? - (this.apiKey === openAiApiKey - ? new OpenAIKeyCredential(this.apiKey ?? "") - : new AzureKeyCredential(this.apiKey ?? "")); - - // eslint-disable-next-line no-instanceof/no-instanceof - const isOpenAIApiKey = azureCredential instanceof OpenAIKeyCredential; - - if (!this.apiKey && !fields?.credentials) { - throw new Error("Azure OpenAI API key not found"); - } - - if (!this.azureOpenAIEndpoint && !isOpenAIApiKey) { - throw new Error("Azure OpenAI Endpoint not found"); - } - - if (!this.azureOpenAIApiDeploymentName && !isOpenAIApiKey) { - throw new Error("Azure OpenAI Deployment name not found"); - } - - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.temperature = fields?.temperature ?? this.temperature; - this.topP = fields?.topP ?? this.topP; - this.logitBias = fields?.logitBias; - this.user = fields?.user; - this.n = fields?.n ?? this.n; - this.logprobs = fields?.logprobs; - this.echo = fields?.echo; - this.stop = fields?.stopSequences ?? fields?.stop; - this.stopSequences = this.stop; - this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; - this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; - this.bestOf = fields?.bestOf ?? this.bestOf; - this.model = fields?.model ?? this.model; - this.modelKwargs = fields?.modelKwargs ?? {}; - this.streaming = fields?.streaming ?? false; - this.batchSize = fields?.batchSize ?? this.batchSize; - - if (this.streaming && this.bestOf && this.bestOf > 1) { - throw new Error("Cannot stream results when bestOf > 1"); - } - - const options = { - userAgentOptions: { userAgentPrefix: USER_AGENT_PREFIX }, - }; - - if (isOpenAIApiKey) { - this.client = new AzureOpenAIClient( - azureCredential as OpenAIKeyCredential - ); - } else if (isTokenCredential(azureCredential)) { - this.client = new AzureOpenAIClient( - this.azureOpenAIEndpoint ?? "", - azureCredential as TokenCredential, - options - ); - } else { - this.client = new AzureOpenAIClient( - this.azureOpenAIEndpoint ?? "", - azureCredential as KeyCredential, - options - ); - } - } - - async *_streamResponseChunks( - input: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const deploymentName = this.azureOpenAIApiDeploymentName || this.model; - - const stream = await this.caller.call(() => - this.client.streamCompletions(deploymentName, [input], { - maxTokens: this.maxTokens, - temperature: this.temperature, - topP: this.topP, - logitBias: this.logitBias, - user: this.user, - n: this.n, - logprobs: this.logprobs, - echo: this.echo, - stop: this.stopSequences, - presencePenalty: this.presencePenalty, - frequencyPenalty: this.frequencyPenalty, - bestOf: this.bestOf, - requestOptions: { - timeout: options?.timeout ?? this.timeout, - }, - abortSignal: options?.signal ?? undefined, - ...this.modelKwargs, - }) - ); - - for await (const data of stream) { - const choice = data?.choices[0]; - if (!choice) { - continue; - } - const chunk = new GenerationChunk({ - text: choice.text, - generationInfo: { - finishReason: choice.finishReason, - }, - }); - yield chunk; - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(chunk.text ?? ""); - } - - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - } - - async _generate( - prompts: string[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const deploymentName = this.azureOpenAIApiDeploymentName || this.model; - - if (this.maxTokens === -1) { - if (prompts.length !== 1) { - throw new Error( - "max_tokens set to -1 not supported for multiple inputs" - ); - } - this.maxTokens = await calculateMaxTokens({ - prompt: prompts[0], - // Cast here to allow for other models that may not fit the union - modelName: this.model as TiktokenModel, - }); - } - - const subPrompts = chunkArray(prompts, this.batchSize); - - if (this.streaming) { - const choices: Choice[] = []; - - for (let i = 0; i < subPrompts.length; i += 1) { - let response: Omit | undefined; - - const stream = await this.caller.call(() => - this.client.streamCompletions(deploymentName, subPrompts[i], { - maxTokens: this.maxTokens, - temperature: this.temperature, - topP: this.topP, - logitBias: this.logitBias, - user: this.user, - n: this.n, - logprobs: this.logprobs, - echo: this.echo, - stop: this.stopSequences, - presencePenalty: this.presencePenalty, - frequencyPenalty: this.frequencyPenalty, - bestOf: this.bestOf, - requestOptions: { - timeout: options?.timeout ?? this.timeout, - }, - abortSignal: options?.signal ?? undefined, - ...this.modelKwargs, - }) - ); - for await (const message of stream) { - if (!response) { - response = { - id: message.id, - created: message.created, - promptFilterResults: message.promptFilterResults, - }; - } - - // on all messages, update choice - for (const part of message.choices) { - if (!choices[part.index]) { - choices[part.index] = part; - } else { - const choice = choices[part.index]; - choice.text += part.text; - choice.finishReason = part.finishReason; - choice.logprobs = part.logprobs; - } - void runManager?.handleLLMNewToken(part.text, { - prompt: Math.floor(part.index / this.n), - completion: part.index % this.n, - }); - } - } - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - } - const generations = chunkArray(choices, this.n).map((promptChoices) => - promptChoices.map((choice) => ({ - text: choice.text ?? "", - generationInfo: { - finishReason: choice.finishReason, - logprobs: choice.logprobs, - }, - })) - ); - return { - generations, - llmOutput: { - tokenUsage: { - completionTokens: undefined, - promptTokens: undefined, - totalTokens: undefined, - }, - }, - }; - } else { - const tokenUsage: TokenUsage = {}; - const subPrompts = chunkArray(prompts, this.batchSize); - const choices: Choice[] = []; - - for (let i = 0; i < subPrompts.length; i += 1) { - const data = await this.caller.call(() => - this.client.getCompletions(deploymentName, prompts, { - maxTokens: this.maxTokens, - temperature: this.temperature, - topP: this.topP, - logitBias: this.logitBias, - user: this.user, - n: this.n, - logprobs: this.logprobs, - echo: this.echo, - stop: this.stopSequences, - presencePenalty: this.presencePenalty, - frequencyPenalty: this.frequencyPenalty, - bestOf: this.bestOf, - requestOptions: { - timeout: options?.timeout ?? this.timeout, - }, - abortSignal: options?.signal ?? undefined, - ...this.modelKwargs, - }) - ); - - choices.push(...data.choices); - - tokenUsage.completionTokens = - (tokenUsage.completionTokens ?? 0) + data.usage.completionTokens; - tokenUsage.promptTokens = - (tokenUsage.promptTokens ?? 0) + data.usage.promptTokens; - tokenUsage.totalTokens = - (tokenUsage.totalTokens ?? 0) + data.usage.totalTokens; - } - - const generations = chunkArray(choices, this.n).map((promptChoices) => - promptChoices.map((choice) => { - void runManager?.handleLLMNewToken(choice.text, { - prompt: Math.floor(choice.index / this.n), - completion: choice.index % this.n, - }); - return { - text: choice.text ?? "", - generationInfo: { - finishReason: choice.finishReason, - logprobs: choice.logprobs, - }, - }; - }) - ); - - return { - generations, - llmOutput: { - tokenUsage: { - completionTokens: tokenUsage.completionTokens, - promptTokens: tokenUsage.promptTokens, - totalTokens: tokenUsage.totalTokens, - }, - }, - }; - } - } - - _llmType() { - return "azure_openai"; - } -} diff --git a/libs/langchain-azure-openai/src/tests/chat_models-extended.int.test.ts b/libs/langchain-azure-openai/src/tests/chat_models-extended.int.test.ts deleted file mode 100644 index 1e4aebc039d7..000000000000 --- a/libs/langchain-azure-openai/src/tests/chat_models-extended.int.test.ts +++ /dev/null @@ -1,212 +0,0 @@ -import { test, expect, jest } from "@jest/globals"; -import { HumanMessage, ToolMessage } from "@langchain/core/messages"; -import { InMemoryCache } from "@langchain/core/caches"; -import { AzureChatOpenAI } from "../chat_models.js"; - -test("Test ChatOpenAI JSON mode", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo-1106", - maxTokens: 128, - }).withConfig({ - response_format: { - type: "json_object", - }, - }); - const message = new HumanMessage("Hello!"); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await chat.invoke([["system", "Only return JSON"], message]); - // console.log(JSON.stringify(res)); -}); - -test("Test ChatOpenAI seed", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo-1106", - maxTokens: 128, - temperature: 1, - }).withConfig({ - seed: 123454930394983, - }); - const message = new HumanMessage("Say something random!"); - const res = await chat.invoke([message]); - // console.log(JSON.stringify(res)); - const res2 = await chat.invoke([message]); - expect(res).toEqual(res2); -}); - -test("Test ChatOpenAI tool calling", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo-1106", - maxTokens: 128, - }).withConfig({ - tools: [ - { - type: "function", - function: { - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] }, - }, - required: ["location"], - }, - }, - }, - ], - tool_choice: "auto", - }); - const res = await chat.invoke([ - ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], - ]); - // console.log(JSON.stringify(res)); - expect(res.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); -}); - -test("Test ChatOpenAI tool calling with ToolMessages", async () => { - function getCurrentWeather(location: string) { - if (location.toLowerCase().includes("tokyo")) { - return JSON.stringify({ location, temperature: "10", unit: "celsius" }); - } else if (location.toLowerCase().includes("san francisco")) { - return JSON.stringify({ - location, - temperature: "72", - unit: "fahrenheit", - }); - } else { - return JSON.stringify({ location, temperature: "22", unit: "celsius" }); - } - } - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo-1106", - maxTokens: 128, - }).withConfig({ - tools: [ - { - type: "function", - function: { - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] }, - }, - required: ["location"], - }, - }, - }, - ], - tool_choice: "auto", - }); - const res = await chat.invoke([ - ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], - ]); - // console.log(JSON.stringify(res)); - expect(res.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const toolMessages = res.additional_kwargs.tool_calls!.map( - (toolCall) => - new ToolMessage({ - tool_call_id: toolCall.id, - name: toolCall.function.name, - content: getCurrentWeather( - JSON.parse(toolCall.function.arguments).location - ), - }) - ); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const finalResponse = await chat.invoke([ - ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], - res, - ...toolMessages, - ]); - // console.log(finalResponse); -}); - -test("Test ChatOpenAI tool calling with streaming", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo-1106", - maxTokens: 256, - }).withConfig({ - tools: [ - { - type: "function", - function: { - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] }, - }, - required: ["location"], - }, - }, - }, - ], - tool_choice: "auto", - }); - const stream = await chat.stream([ - ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], - ]); - let finalChunk; - const chunks = []; - for await (const chunk of stream) { - // console.log(chunk.additional_kwargs.tool_calls); - chunks.push(chunk); - if (!finalChunk) { - finalChunk = chunk; - } else { - finalChunk = finalChunk.concat(chunk); - } - } - expect(chunks.length).toBeGreaterThan(1); - // console.log(finalChunk?.additional_kwargs.tool_calls); - expect(finalChunk?.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); -}); - -test("ChatOpenAI in JSON mode can cache generations", async () => { - const memoryCache = new InMemoryCache(); - const lookupSpy = jest.spyOn(memoryCache, "lookup"); - const updateSpy = jest.spyOn(memoryCache, "update"); - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo-1106", - temperature: 1, - cache: memoryCache, - }).withConfig({ - response_format: { - type: "json_object", - }, - }); - const message = new HumanMessage( - "Respond with a JSON object containing arbitrary fields." - ); - const res = await chat.invoke([message]); - // console.log(res); - - const res2 = await chat.invoke([message]); - // console.log(res2); - - expect(res).toEqual(res2); - - expect(lookupSpy).toHaveBeenCalledTimes(2); - expect(updateSpy).toHaveBeenCalledTimes(1); - - lookupSpy.mockRestore(); - updateSpy.mockRestore(); -}); diff --git a/libs/langchain-azure-openai/src/tests/chat_models-vision.int.test.ts b/libs/langchain-azure-openai/src/tests/chat_models-vision.int.test.ts deleted file mode 100644 index 35c6f385e908..000000000000 --- a/libs/langchain-azure-openai/src/tests/chat_models-vision.int.test.ts +++ /dev/null @@ -1,58 +0,0 @@ -import { test } from "@jest/globals"; -import { HumanMessage } from "@langchain/core/messages"; -import * as fs from "node:fs/promises"; -import { fileURLToPath } from "node:url"; -import * as path from "node:path"; -import { AzureChatOpenAI } from "../chat_models.js"; - -test("Test ChatOpenAI with a file", async () => { - const __filename = fileURLToPath(import.meta.url); - const __dirname = path.dirname(__filename); - const imageData = await fs.readFile(path.join(__dirname, "/data/hotdog.jpg")); - const chat = new AzureChatOpenAI({ - model: "gpt-4-vision-preview", - maxTokens: 1024, - }); - const message = new HumanMessage({ - content: [ - { - type: "text", - text: "What's in this image?", - }, - { - type: "image_url", - image_url: { - url: `data:image/jpeg;base64,${imageData.toString("base64")}`, - }, - }, - ], - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await chat.invoke([message]); - // console.log({ res }); -}); - -test("Test ChatOpenAI with a URL", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-4-vision-preview", - maxTokens: 1024, - }); - const message = new HumanMessage({ - content: [ - { - type: "text", - text: "What does this image say?", - }, - { - type: "image_url", - image_url: - "https://www.freecodecamp.org/news/content/images/2023/05/Screenshot-2023-05-29-at-5.40.38-PM.png", - }, - ], - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await chat.invoke([message]); - // console.log({ res }); -}); diff --git a/libs/langchain-azure-openai/src/tests/chat_models.int.test.ts b/libs/langchain-azure-openai/src/tests/chat_models.int.test.ts deleted file mode 100644 index 57e02e297623..000000000000 --- a/libs/langchain-azure-openai/src/tests/chat_models.int.test.ts +++ /dev/null @@ -1,852 +0,0 @@ -import { test, jest, expect } from "@jest/globals"; -import { - BaseMessage, - ChatMessage, - HumanMessage, - SystemMessage, -} from "@langchain/core/messages"; -import { ChatGeneration, LLMResult } from "@langchain/core/outputs"; -import { ChatPromptValue } from "@langchain/core/prompt_values"; -import { - PromptTemplate, - ChatPromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -} from "@langchain/core/prompts"; -import { CallbackManager } from "@langchain/core/callbacks/manager"; -import { NewTokenIndices } from "@langchain/core/callbacks/base"; -import { InMemoryCache } from "@langchain/core/caches"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { OpenAIKeyCredential } from "@azure/openai"; -import { AzureChatOpenAI } from "../chat_models.js"; - -test("Test ChatOpenAI", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 10, - }); - const message = new HumanMessage("Hello!"); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await chat.call([message]); - // console.log({ res }); -}); - -test("Test ChatOpenAI with SystemChatMessage", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 10, - }); - const system_message = new SystemMessage("You are to chat with a user."); - const message = new HumanMessage("Hello!"); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await chat.call([system_message, message]); - // console.log({ res }); -}); - -test("Test ChatOpenAI Generate", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 10, - n: 2, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message], [message]]); - expect(res.generations.length).toBe(2); - for (const generation of res.generations) { - expect(generation.length).toBe(2); - for (const message of generation) { - // console.log(message.text); - expect(typeof message.text).toBe("string"); - } - } - // console.log({ res }); -}); - -test("Test ChatOpenAI Generate throws when one of the calls fails", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 10, - n: 2, - }); - const message = new HumanMessage("Hello!"); - await expect(() => - chat.generate([[message], [message]], { - signal: AbortSignal.timeout(10), - }) - ).rejects.toThrow(); -}); - -test("Test ChatOpenAI tokenUsage", async () => { - let tokenUsage = { - completionTokens: 0, - promptTokens: 0, - totalTokens: 0, - }; - - const model = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 10, - callbackManager: CallbackManager.fromHandlers({ - async handleLLMEnd(output: LLMResult) { - // console.log(output); - tokenUsage = output.llmOutput?.tokenUsage; - }, - }), - }); - const message = new HumanMessage("Hello"); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke([message]); - // console.log({ res }); - - expect(tokenUsage.promptTokens).toBeGreaterThan(0); -}); - -test("Test ChatOpenAI tokenUsage with a batch", async () => { - let tokenUsage = { - completionTokens: 0, - promptTokens: 0, - totalTokens: 0, - }; - - const model = new AzureChatOpenAI({ - temperature: 0, - model: "gpt-3.5-turbo", - callbackManager: CallbackManager.fromHandlers({ - async handleLLMEnd(output: LLMResult) { - tokenUsage = output.llmOutput?.tokenUsage; - }, - }), - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.generate([ - [new HumanMessage("Hello")], - [new HumanMessage("Hi")], - ]); - // console.log(res); - - expect(tokenUsage.promptTokens).toBeGreaterThan(0); -}); - -test("Test ChatOpenAI in streaming mode", async () => { - let nrNewTokens = 0; - let streamedCompletion = ""; - - const model = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - streaming: true, - maxTokens: 10, - callbacks: [ - { - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }, - ], - }); - const message = new HumanMessage("Hello!"); - const result = await model.invoke([message]); - - expect(nrNewTokens > 0).toBe(true); - expect(result.content).toBe(streamedCompletion); -}, 10000); - -test("Test ChatOpenAI in streaming mode with n > 1 and multiple prompts", async () => { - let nrNewTokens = 0; - const streamedCompletions = [ - ["", ""], - ["", ""], - ]; - - const model = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - streaming: true, - maxTokens: 10, - n: 2, - callbacks: [ - { - async handleLLMNewToken(token: string, idx: NewTokenIndices) { - nrNewTokens += 1; - streamedCompletions[idx.prompt][idx.completion] += token; - }, - }, - ], - }); - const message1 = new HumanMessage("Hello!"); - const message2 = new HumanMessage("Bye!"); - const result = await model.generate([[message1], [message2]]); - - expect(nrNewTokens > 0).toBe(true); - expect(result.generations.map((g) => g.map((gg) => gg.text))).toEqual( - streamedCompletions - ); -}, 10000); - -test("Test ChatOpenAI prompt value", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 10, - n: 2, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generatePrompt([new ChatPromptValue([message])]); - expect(res.generations.length).toBe(1); - for (const generation of res.generations) { - expect(generation.length).toBe(2); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - for (const g of generation) { - // console.log(g.text); - } - } - // console.log({ res }); -}); - -test("OpenAI Chat, docs, prompt templates", async () => { - const chat = new AzureChatOpenAI({ temperature: 0, maxTokens: 10 }); - - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant that translates {input_language} to {output_language}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - input_language: "English", - output_language: "French", - text: "I love programming.", - }), - ]); - - // console.log(responseA.generations); -}, 5000); - -test("Test OpenAI with stop", async () => { - const model = new AzureChatOpenAI({ maxTokens: 5 }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.call( - [new HumanMessage("Print hello world")], - ["world"] - ); - // console.log({ res }); -}); - -test("Test OpenAI with stop in object", async () => { - const model = new AzureChatOpenAI({ maxTokens: 5 }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke([new HumanMessage("Print hello world")], { - stop: ["world"], - }); - // console.log({ res }); -}); - -test("Test OpenAI with timeout in call options", async () => { - const model = new AzureChatOpenAI({ maxTokens: 5 }); - await expect(() => - model.invoke([new HumanMessage("Print hello world")], { timeout: 10 }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with timeout in call options and node adapter", async () => { - const model = new AzureChatOpenAI({ maxTokens: 5 }); - await expect(() => - model.invoke([new HumanMessage("Print hello world")], { timeout: 10 }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options", async () => { - const model = new AzureChatOpenAI({ maxTokens: 5 }); - const controller = new AbortController(); - await expect(() => { - const ret = model.invoke([new HumanMessage("Print hello world")], { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options and node adapter", async () => { - const model = new AzureChatOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - const controller = new AbortController(); - await expect(() => { - const ret = model.invoke([new HumanMessage("Print hello world")], { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with specific roles in ChatMessage", async () => { - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 10, - }); - const system_message = new ChatMessage( - "You are to chat with a user.", - "system" - ); - const user_message = new ChatMessage("Hello!", "user"); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await chat.call([system_message, user_message]); - // console.log({ res }); -}); - -test("Test ChatOpenAI stream method", async () => { - const model = new AzureChatOpenAI({ - maxTokens: 50, - model: "gpt-3.5-turbo", - }); - const stream = await model.stream("Print hello world."); - const chunks = []; - for await (const chunk of stream) { - // console.log(chunk); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); -}); - -test("Test ChatOpenAI stream method with abort", async () => { - await expect(async () => { - const model = new AzureChatOpenAI({ - maxTokens: 100, - model: "gpt-3.5-turbo", - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose.", - { - signal: AbortSignal.timeout(500), - } - ); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - for await (const chunk of stream) { - // console.log(chunk); - } - }).rejects.toThrow(); -}); - -test("Test ChatOpenAI stream method with early break", async () => { - const model = new AzureChatOpenAI({ - maxTokens: 50, - model: "gpt-3.5-turbo", - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose." - ); - let i = 0; - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - for await (const chunk of stream) { - // console.log(chunk); - i += 1; - if (i > 10) { - break; - } - } -}); - -test("Test ChatOpenAI stream method, timeout error thrown from SDK", async () => { - await expect(async () => { - const model = new AzureChatOpenAI({ - maxTokens: 50, - model: "gpt-3.5-turbo", - timeout: 1, - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose." - ); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - for await (const chunk of stream) { - // console.log(chunk); - } - }).rejects.toThrow(); -}); - -test("Function calling with streaming", async () => { - let finalResult: BaseMessage | undefined; - const modelForFunctionCalling = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - temperature: 0, - callbacks: [ - { - handleLLMEnd(output: LLMResult) { - finalResult = (output.generations[0][0] as ChatGeneration).message; - }, - }, - ], - }); - - const stream = await modelForFunctionCalling.stream( - "What is the weather in New York?", - { - functions: [ - { - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] }, - }, - required: ["location"], - }, - }, - ], - function_call: { - name: "get_current_weather", - }, - } - ); - - const chunks = []; - let streamedOutput; - for await (const chunk of stream) { - chunks.push(chunk); - if (!streamedOutput) { - streamedOutput = chunk; - } else if (chunk) { - streamedOutput = streamedOutput.concat(chunk); - } - } - - expect(finalResult).toEqual(streamedOutput); - expect(chunks.length).toBeGreaterThan(1); - expect(finalResult?.additional_kwargs?.function_call?.name).toBe( - "get_current_weather" - ); - // console.log( - // JSON.parse(finalResult?.additional_kwargs?.function_call?.arguments ?? "") - // .location - // ); -}); - -test("ChatOpenAI can cache generations", async () => { - const memoryCache = new InMemoryCache(); - const lookupSpy = jest.spyOn(memoryCache, "lookup"); - const updateSpy = jest.spyOn(memoryCache, "update"); - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 10, - n: 2, - cache: memoryCache, - }); - const message = new HumanMessage("Hello"); - const res = await chat.generate([[message], [message]]); - expect(res.generations.length).toBe(2); - - expect(lookupSpy).toHaveBeenCalledTimes(2); - expect(updateSpy).toHaveBeenCalledTimes(2); - - lookupSpy.mockRestore(); - updateSpy.mockRestore(); -}); - -test("ChatOpenAI can write and read cached generations", async () => { - const memoryCache = new InMemoryCache(); - const lookupSpy = jest.spyOn(memoryCache, "lookup"); - const updateSpy = jest.spyOn(memoryCache, "update"); - - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 100, - n: 1, - cache: memoryCache, - }); - const generateUncachedSpy = jest.spyOn(chat, "_generateUncached"); - - const messages = [ - [ - new HumanMessage("what color is the sky?"), - new HumanMessage("what color is the ocean?"), - ], - [new HumanMessage("hello")], - ]; - - const response1 = await chat.generate(messages); - expect(generateUncachedSpy).toHaveBeenCalledTimes(1); - generateUncachedSpy.mockRestore(); - - const response2 = await chat.generate(messages); - expect(generateUncachedSpy).toHaveBeenCalledTimes(0); // Request should be cached, no need to generate. - generateUncachedSpy.mockRestore(); - - expect(response1.generations.length).toBe(2); - expect(response2.generations).toEqual(response1.generations); - expect(lookupSpy).toHaveBeenCalledTimes(4); - expect(updateSpy).toHaveBeenCalledTimes(2); - - lookupSpy.mockRestore(); - updateSpy.mockRestore(); -}); - -test("ChatOpenAI should not reuse cache if function call args have changed", async () => { - const memoryCache = new InMemoryCache(); - const lookupSpy = jest.spyOn(memoryCache, "lookup"); - const updateSpy = jest.spyOn(memoryCache, "update"); - - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 100, - n: 1, - cache: memoryCache, - }); - - const generateUncachedSpy = jest.spyOn(chat, "_generateUncached"); - - const messages = [ - [ - new HumanMessage("what color is the sky?"), - new HumanMessage("what color is the ocean?"), - ], - [new HumanMessage("hello")], - ]; - - const response1 = await chat.generate(messages); - expect(generateUncachedSpy).toHaveBeenCalledTimes(1); - generateUncachedSpy.mockRestore(); - - const response2 = await chat.generate(messages, { - functions: [ - { - name: "extractor", - description: "Extract fields from the input", - parameters: { - type: "object", - properties: { - tone: { - type: "string", - description: "the tone of the input", - }, - }, - required: ["tone"], - }, - }, - ], - function_call: { - name: "extractor", - }, - }); - - expect(generateUncachedSpy).toHaveBeenCalledTimes(0); // Request should not be cached since it's being called with different function call args - - expect(response1.generations.length).toBe(2); - expect( - (response2.generations[0][0] as ChatGeneration).message.additional_kwargs - .function_call?.name ?? "" - ).toEqual("extractor"); - - const response3 = await chat.generate(messages, { - functions: [ - { - name: "extractor", - description: "Extract fields from the input", - parameters: { - type: "object", - properties: { - tone: { - type: "string", - description: "the tone of the input", - }, - }, - required: ["tone"], - }, - }, - ], - function_call: { - name: "extractor", - }, - }); - - expect(response2.generations).toEqual(response3.generations); - - expect(lookupSpy).toHaveBeenCalledTimes(6); - expect(updateSpy).toHaveBeenCalledTimes(4); - - lookupSpy.mockRestore(); - updateSpy.mockRestore(); -}); - -function createSampleMessages(): BaseMessage[] { - // same example as in https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb - return [ - createSystemChatMessage( - "You are a helpful, pattern-following assistant that translates corporate jargon into plain English." - ), - createSystemChatMessage( - "New synergies will help drive top-line growth.", - "example_user" - ), - createSystemChatMessage( - "Things working well together will increase revenue.", - "example_assistant" - ), - createSystemChatMessage( - "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.", - "example_user" - ), - createSystemChatMessage( - "Let's talk later when we're less busy about how to do better.", - "example_assistant" - ), - new HumanMessage( - "This late pivot means we don't have time to boil the ocean for the client deliverable." - ), - ]; -} - -function createSystemChatMessage(text: string, name?: string) { - const msg = new SystemMessage(text); - msg.name = name; - return msg; -} - -test("getNumTokensFromMessages gpt-3.5-turbo-0301 model for sample input", async () => { - const messages: BaseMessage[] = createSampleMessages(); - - const chat = new AzureChatOpenAI({ - azureOpenAIApiKey: "dummy", - model: "gpt-3.5-turbo-0301", - }); - - const { totalCount } = await chat.getNumTokensFromMessages(messages); - - expect(totalCount).toBe(127); -}); - -test("getNumTokensFromMessages gpt-4-0314 model for sample input", async () => { - const messages: BaseMessage[] = createSampleMessages(); - - const chat = new AzureChatOpenAI({ - azureOpenAIApiKey: "dummy", - model: "gpt-4-0314", - }); - - const { totalCount } = await chat.getNumTokensFromMessages(messages); - - expect(totalCount).toBe(129); -}); - -test("Test ChatOpenAI token usage reporting for streaming function calls", async () => { - let streamingTokenUsed = -1; - let nonStreamingTokenUsed = -1; - - const humanMessage = "What a beautiful day!"; - const extractionFunctionSchema = { - name: "extractor", - description: "Extracts fields from the input.", - parameters: { - type: "object", - properties: { - tone: { - type: "string", - enum: ["positive", "negative"], - description: "The overall tone of the input", - }, - word_count: { - type: "number", - description: "The number of words in the input", - }, - chat_response: { - type: "string", - description: "A response to the human's input", - }, - }, - required: ["tone", "word_count", "chat_response"], - }, - }; - - const streamingModel = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - streaming: true, - maxRetries: 10, - maxConcurrency: 10, - temperature: 0, - topP: 0, - callbacks: [ - { - handleLLMEnd: async (output) => { - streamingTokenUsed = - output.llmOutput?.estimatedTokenUsage?.totalTokens; - // console.log("streaming usage", output.llmOutput?.estimatedTokenUsage); - }, - handleLLMError: async (_err) => { - // console.error(err); - }, - }, - ], - }).withConfig({ - functions: [extractionFunctionSchema], - function_call: { name: "extractor" }, - }); - - const nonStreamingModel = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - streaming: false, - maxRetries: 10, - maxConcurrency: 10, - temperature: 0, - topP: 0, - callbacks: [ - { - handleLLMEnd: async (output) => { - nonStreamingTokenUsed = output.llmOutput?.tokenUsage?.totalTokens; - // console.log("non-streaming usage", output.llmOutput?.tokenUsage); - }, - handleLLMError: async (_err) => { - // console.error(err); - }, - }, - ], - }).withConfig({ - functions: [extractionFunctionSchema], - function_call: { name: "extractor" }, - }); - - const [nonStreamingResult, streamingResult] = await Promise.all([ - nonStreamingModel.invoke([new HumanMessage(humanMessage)]), - streamingModel.invoke([new HumanMessage(humanMessage)]), - ]); - - if ( - nonStreamingResult.additional_kwargs.function_call?.arguments && - streamingResult.additional_kwargs.function_call?.arguments - ) { - const nonStreamingArguments = JSON.stringify( - JSON.parse(nonStreamingResult.additional_kwargs.function_call.arguments) - ); - const streamingArguments = JSON.stringify( - JSON.parse(streamingResult.additional_kwargs.function_call.arguments) - ); - if (nonStreamingArguments === streamingArguments) { - expect(streamingTokenUsed).toEqual(nonStreamingTokenUsed); - } - } - - expect(streamingTokenUsed).toBeGreaterThan(-1); -}); - -test("Test ChatOpenAI token usage reporting for streaming calls", async () => { - let streamingTokenUsed = -1; - let nonStreamingTokenUsed = -1; - const systemPrompt = "You are a helpful assistant"; - const question = "What is the color of the night sky?"; - - const streamingModel = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - streaming: true, - maxRetries: 10, - maxConcurrency: 10, - temperature: 0, - topP: 0, - callbacks: [ - { - handleLLMEnd: async (output) => { - streamingTokenUsed = - output.llmOutput?.estimatedTokenUsage?.totalTokens; - // console.log("streaming usage", output.llmOutput?.estimatedTokenUsage); - }, - handleLLMError: async (_err) => { - // console.error(err); - }, - }, - ], - }); - - const nonStreamingModel = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - streaming: false, - maxRetries: 10, - maxConcurrency: 10, - temperature: 0, - topP: 0, - callbacks: [ - { - handleLLMEnd: async (output) => { - nonStreamingTokenUsed = output.llmOutput?.tokenUsage?.totalTokens; - // console.log("non-streaming usage", output.llmOutput?.estimated); - }, - handleLLMError: async (_err) => { - // console.error(err); - }, - }, - ], - }); - - const [nonStreamingResult, streamingResult] = await Promise.all([ - nonStreamingModel.generate([ - [new SystemMessage(systemPrompt), new HumanMessage(question)], - ]), - streamingModel.generate([ - [new SystemMessage(systemPrompt), new HumanMessage(question)], - ]), - ]); - - expect(streamingTokenUsed).toBeGreaterThan(-1); - if ( - nonStreamingResult.generations[0][0].text === - streamingResult.generations[0][0].text - ) { - expect(streamingTokenUsed).toEqual(nonStreamingTokenUsed); - } -}); - -test("Test Azure ChatOpenAI with key credentials ", async () => { - const model = new AzureChatOpenAI({ - maxTokens: 5, - model: "davinci-002", - azureOpenAIApiKey: getEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? "", - azureOpenAIEndpoint: - getEnvironmentVariable("AZURE_OPENAI_API_ENDPOINT") ?? "", - azureOpenAIApiDeploymentName: - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME") ?? "", - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke("Print hello world"); - // console.log({ res }); -}); - -test("Test ChatOpenAI with OpenAI API key credentials", async () => { - const openAiKey: string = getEnvironmentVariable("OPENAI_API_KEY") ?? ""; - const credentials = new OpenAIKeyCredential(openAiKey); - - const chat = new AzureChatOpenAI({ - model: "gpt-3.5-turbo", - maxTokens: 5, - credentials, - azureOpenAIEndpoint: "", - azureOpenAIApiDeploymentName: "", - }); - const message = new HumanMessage("Hello!"); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await chat.invoke([["system", "Say hi"], message]); - // console.log(res); -}); diff --git a/libs/langchain-azure-openai/src/tests/data/hotdog.jpg b/libs/langchain-azure-openai/src/tests/data/hotdog.jpg deleted file mode 100644 index dfab265903be..000000000000 Binary files a/libs/langchain-azure-openai/src/tests/data/hotdog.jpg and /dev/null differ diff --git a/libs/langchain-azure-openai/src/tests/embeddings.int.test.ts b/libs/langchain-azure-openai/src/tests/embeddings.int.test.ts deleted file mode 100644 index ad2c23dd2147..000000000000 --- a/libs/langchain-azure-openai/src/tests/embeddings.int.test.ts +++ /dev/null @@ -1,108 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { ClientSecretCredential, TokenCredential } from "@azure/identity"; -import { OpenAIKeyCredential } from "@azure/openai"; -import { AzureOpenAIEmbeddings } from "../embeddings.js"; - -test("Test OpenAIEmbeddings.embedQuery", async () => { - const embeddings = new AzureOpenAIEmbeddings(); - const res = await embeddings.embedQuery("Hello world"); - expect(typeof res[0]).toBe("number"); -}); - -test("Test OpenAIEmbeddings.embedDocuments", async () => { - const embeddings = new AzureOpenAIEmbeddings(); - const res = await embeddings.embedDocuments(["Hello world", "Bye bye"]); - expect(res).toHaveLength(2); - expect(typeof res[0][0]).toBe("number"); - expect(typeof res[1][0]).toBe("number"); -}); - -test("Test OpenAIEmbeddings.embedDocuments batching", async () => { - const embeddings = new AzureOpenAIEmbeddings({ - batchSize: 16, - }); - const res = await embeddings.embedDocuments(["Hello world", "Bye bye"]); - expect(res).toHaveLength(2); - expect(typeof res[0][0]).toBe("number"); - expect(typeof res[1][0]).toBe("number"); -}); - -test("Test OpenAIEmbeddings concurrency", async () => { - const embeddings = new AzureOpenAIEmbeddings({ - batchSize: 1, - maxConcurrency: 2, - }); - const res = await embeddings.embedDocuments([ - "Hello world", - "Bye bye", - "Hello world", - "Bye bye", - "Hello world", - "Bye bye", - ]); - expect(res).toHaveLength(6); - expect(res.find((embedding) => typeof embedding[0] !== "number")).toBe( - undefined - ); -}); - -test("Test timeout error thrown from SDK", async () => { - await expect(async () => { - const model = new AzureOpenAIEmbeddings({ - timeout: 1, - }); - await model.embedDocuments([ - "Hello world", - "Bye bye", - "Hello world", - "Bye bye", - "Hello world", - "Bye bye", - ]); - }).rejects.toThrow(); -}); - -test("Test OpenAIEmbeddings.embedQuery with TokenCredentials", async () => { - const tenantId: string = getEnvironmentVariable("AZURE_TENANT_ID") ?? ""; - const clientId: string = getEnvironmentVariable("AZURE_CLIENT_ID") ?? ""; - const clientSecret: string = - getEnvironmentVariable("AZURE_CLIENT_SECRET") ?? ""; - - const credentials: TokenCredential = new ClientSecretCredential( - tenantId, - clientId, - clientSecret - ); - - const embeddings = new AzureOpenAIEmbeddings({ credentials }); - const res = await embeddings.embedQuery("Hello world"); - expect(typeof res[0]).toBe("number"); -}); - -test("Test OpenAIEmbeddings.embedQuery with key credentials ", async () => { - const embeddings = new AzureOpenAIEmbeddings({ - modelName: "text-embedding-ada-002", - azureOpenAIApiKey: getEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? "", - azureOpenAIEndpoint: - getEnvironmentVariable("AZURE_OPENAI_API_ENDPOINT") ?? "", - azureOpenAIApiDeploymentName: - getEnvironmentVariable("AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME") ?? - "", - }); - const res = await embeddings.embedQuery("Hello world"); - expect(typeof res[0]).toBe("number"); -}); - -test("Test OpenAIEmbeddings.embedQuery with OpenAI API key credentials", async () => { - const openAiKey: string = getEnvironmentVariable("OPENAI_API_KEY") ?? ""; - const credentials = new OpenAIKeyCredential(openAiKey); - - const embeddings = new AzureOpenAIEmbeddings({ - credentials, - azureOpenAIEndpoint: "", - azureOpenAIApiDeploymentName: "", - }); - const res = await embeddings.embedQuery("Hello world"); - expect(typeof res[0]).toBe("number"); -}); diff --git a/libs/langchain-azure-openai/src/tests/llms.int.test.ts b/libs/langchain-azure-openai/src/tests/llms.int.test.ts deleted file mode 100644 index bb0ff449a3ca..000000000000 --- a/libs/langchain-azure-openai/src/tests/llms.int.test.ts +++ /dev/null @@ -1,381 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { LLMResult } from "@langchain/core/outputs"; -import { StringPromptValue } from "@langchain/core/prompt_values"; -import { CallbackManager } from "@langchain/core/callbacks/manager"; -import { NewTokenIndices } from "@langchain/core/callbacks/base"; -import { ClientSecretCredential } from "@azure/identity"; -import { TokenCredential } from "@azure/core-auth"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { OpenAIKeyCredential } from "@azure/openai"; -import { AzureOpenAI } from "../llms.js"; - -test("Test OpenAI", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke("Print hello world"); - // console.log({ res }); -}); - -test("Test OpenAI with stop", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.call("Print hello world", ["world"]); - // console.log({ res }); -}); - -test("Test OpenAI with stop in object", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke("Print hello world", { stop: ["world"] }); - // console.log({ res }); -}); - -test("Test OpenAI with timeout in call options", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - await expect(() => - model.invoke("Print hello world", { - timeout: 10, - }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with timeout in call options and node adapter", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - await expect(() => - model.invoke("Print hello world", { - timeout: 10, - }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - const controller = new AbortController(); - await expect(() => { - const ret = model.invoke("Print hello world", { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options and node adapter", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - const controller = new AbortController(); - await expect(() => { - const ret = model.invoke("Print hello world", { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with concurrency == 1", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - maxConcurrency: 1, - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await Promise.all([ - model.invoke("Print hello world"), - model.invoke("Print hello world"), - ]); - // console.log({ res }); -}); - -test("Test OpenAI with maxTokens -1", async () => { - const model = new AzureOpenAI({ - maxTokens: -1, - model: "gpt-3.5-turbo-instruct", - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.call("Print hello world", ["world"]); - // console.log({ res }); -}); - -test("Test OpenAI with instruct model returns OpenAI", async () => { - const model = new AzureOpenAI({ model: "gpt-3.5-turbo-instruct" }); - expect(model).toBeInstanceOf(AzureOpenAI); - const res = await model.invoke("Print hello world"); - // console.log({ res }); - expect(typeof res).toBe("string"); -}); - -test("Test OpenAI with versioned instruct model returns OpenAI", async () => { - const model = new AzureOpenAI({ - model: "gpt-3.5-turbo-instruct-0914", - }); - expect(model).toBeInstanceOf(AzureOpenAI); - const res = await model.invoke("Print hello world"); - // console.log({ res }); - expect(typeof res).toBe("string"); -}); - -test("Test ChatOpenAI tokenUsage", async () => { - let tokenUsage = { - completionTokens: 0, - promptTokens: 0, - totalTokens: 0, - }; - - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - callbackManager: CallbackManager.fromHandlers({ - async handleLLMEnd(output: LLMResult) { - tokenUsage = output.llmOutput?.tokenUsage; - }, - }), - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke("Hello"); - // console.log({ res }); - - expect(tokenUsage.promptTokens).toBe(1); -}); - -test("Test OpenAI in streaming mode", async () => { - let nrNewTokens = 0; - let streamedCompletion = ""; - - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - streaming: true, - callbacks: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }), - }); - const res = await model.invoke("Print hello world"); - // console.log({ res }); - - expect(nrNewTokens > 0).toBe(true); - expect(res).toBe(streamedCompletion); -}); - -test("Test OpenAI in streaming mode with multiple prompts", async () => { - let nrNewTokens = 0; - const completions = [ - ["", ""], - ["", ""], - ]; - - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - streaming: true, - n: 2, - callbacks: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string, idx: NewTokenIndices) { - nrNewTokens += 1; - completions[idx.prompt][idx.completion] += token; - }, - }), - }); - const res = await model.generate(["Print hello world", "print hello sea"]); - // console.log( - // res.generations, - // res.generations.map((g) => g[0].generationInfo) - // ); - - expect(nrNewTokens > 0).toBe(true); - expect(res.generations.length).toBe(2); - expect(res.generations.map((g) => g.map((gg) => gg.text))).toEqual( - completions - ); -}); - -test("Test OpenAIChat in streaming mode with multiple prompts", async () => { - let nrNewTokens = 0; - const completions = [[""], [""]]; - - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo", - streaming: true, - n: 1, - callbacks: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string, idx: NewTokenIndices) { - nrNewTokens += 1; - completions[idx.prompt][idx.completion] += token; - }, - }), - }); - const res = await model.generate(["Print hello world", "print hello sea"]); - // console.log( - // res.generations, - // res.generations.map((g) => g[0].generationInfo) - // ); - - expect(nrNewTokens > 0).toBe(true); - expect(res.generations.length).toBe(2); - expect(res.generations.map((g) => g.map((gg) => gg.text))).toEqual( - completions - ); -}); - -test("Test OpenAI prompt value", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - }); - const res = await model.generatePrompt([ - new StringPromptValue("Print hello world"), - ]); - expect(res.generations.length).toBe(1); - for (const generation of res.generations) { - expect(generation.length).toBe(1); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - for (const g of generation) { - // console.log(g.text); - } - } - // console.log({ res }); -}); - -test("Test OpenAI stream method", async () => { - const model = new AzureOpenAI({ - maxTokens: 50, - model: "gpt-3.5-turbo-instruct", - }); - const stream = await model.stream("Print hello world."); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); -}); - -test("Test OpenAI stream method with abort", async () => { - await expect(async () => { - const model = new AzureOpenAI({ - maxTokens: 250, - model: "gpt-3.5-turbo-instruct", - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose.", - { - signal: AbortSignal.timeout(1000), - } - ); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - for await (const chunk of stream) { - // console.log(chunk); - } - }).rejects.toThrow(); -}); - -test("Test OpenAI stream method with early break", async () => { - const model = new AzureOpenAI({ - maxTokens: 50, - model: "gpt-3.5-turbo-instruct", - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose." - ); - let i = 0; - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - for await (const chunk of stream) { - // console.log(chunk); - i += 1; - if (i > 5) { - break; - } - } -}); - -test("Test OpenAI with Token credentials ", async () => { - const tenantId: string = getEnvironmentVariable("AZURE_TENANT_ID") ?? ""; - const clientId: string = getEnvironmentVariable("AZURE_CLIENT_ID") ?? ""; - const clientSecret: string = - getEnvironmentVariable("AZURE_CLIENT_SECRET") ?? ""; - - const credentials: TokenCredential = new ClientSecretCredential( - tenantId, - clientId, - clientSecret - ); - - const model = new AzureOpenAI({ - maxTokens: 5, - model: "gpt-3.5-turbo-instruct", - credentials, - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke("Print hello world"); - // console.log({ res }); -}); - -test("Test Azure OpenAI with key credentials ", async () => { - const model = new AzureOpenAI({ - maxTokens: 5, - model: "davinci-002", - azureOpenAIApiKey: getEnvironmentVariable("AZURE_OPENAI_API_KEY") ?? "", - azureOpenAIEndpoint: - getEnvironmentVariable("AZURE_OPENAI_API_ENDPOINT") ?? "", - azureOpenAIApiDeploymentName: - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME") ?? "", - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke("Print hello world"); - // console.log({ res }); -}); - -test("Test OpenAI with OpenAI API key credentials ", async () => { - const openAiKey: string = getEnvironmentVariable("OPENAI_API_KEY") ?? ""; - const credentials = new OpenAIKeyCredential(openAiKey); - - const model = new AzureOpenAI({ - maxTokens: 5, - model: "davinci-002", - credentials, - azureOpenAIEndpoint: "", - azureOpenAIApiDeploymentName: "", - }); - // @eslint-disable-next-line/@typescript-eslint/ban-ts-comment - // @ts-expect-error unused var - const res = await model.invoke("Print hello world"); - // console.log({ res }); -}); diff --git a/libs/langchain-azure-openai/src/types.ts b/libs/langchain-azure-openai/src/types.ts deleted file mode 100644 index d00d9e692b1b..000000000000 --- a/libs/langchain-azure-openai/src/types.ts +++ /dev/null @@ -1,223 +0,0 @@ -import type { - OpenAIClientOptions, - AzureExtensionsOptions, - ChatRequestMessage, -} from "@azure/openai"; -import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; -import type { TiktokenModel } from "js-tiktoken/lite"; -import type { EmbeddingsParams } from "@langchain/core/embeddings"; -import type { KeyCredential, TokenCredential } from "@azure/core-auth"; - -// reexport this type from the included package so we can easily override and extend it if needed in the future -// also makes it easier for folks to import this type without digging around into the dependent packages -export type { TiktokenModel }; - -export declare interface AzureOpenAIInput { - openAIApiKey?: string; - - /** - * API key to use when making requests to Azure OpenAI. - * Alias for `apiKey` - */ - azureOpenAIApiKey?: string; - /** - * API key to use when making requests to Azure OpenAI. - */ - apiKey?: string; - - /** - * Endpoint to use when making requests to Azure OpenAI - */ - azureOpenAIEndpoint?: string; - - /** - * Azure OpenAI API deployment name to use for completions when making requests to Azure OpenAI. - * This is the name of the deployment you created in the Azure portal. - * e.g. "my-openai-deployment" - * this will be used in the endpoint URL: https://{InstanceName}.openai.azure.com/openai/deployments/my-openai-deployment/ - */ - azureOpenAIApiDeploymentName?: string; - - /** @deprecated Use "azureOpenAIApiDeploymentName" instead. */ - azureOpenAIEmbeddingsApiDeploymentName?: string; - - /** - * API version to use when making requests to Azure OpenAI. - */ - azureOpenAIApiVersion?: string; - - credentials?: KeyCredential | TokenCredential; -} - -export declare interface OpenAIBaseInput { - /** - * Maximum number of tokens to generate in the completion. -1 returns as many - * tokens as possible given the prompt and the model's maximum context size. - */ - maxTokens?: number; - - /** - * The sampling temperature to use that controls the apparent creativity of generated completions. - * Higher values will make output more random while lower values will make results more focused - * and deterministic. - * It is not recommended to modify temperature and top_p for the same completions request as the - * interaction of these two settings is difficult to predict. - */ - temperature: number; - - /** - * An alternative to sampling with temperature called nucleus sampling. This value causes the - * model to consider the results of tokens with the provided probability mass. As an example, a - * value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - * considered. - * It is not recommended to modify temperature and top_p for the same completions request as the - * interaction of these two settings is difficult to predict. - */ - topP: number; - - /** - * A map between GPT token IDs and bias scores that influences the probability of specific tokens - * appearing in a completions response. Token IDs are computed via external tokenizer tools, while - * bias scores reside in the range of -100 to 100 with minimum and maximum values corresponding to - * a full ban or exclusive selection of a token, respectively. The exact behavior of a given bias - * score varies by model. - */ - logitBias?: Record; - - /** - * An identifier for the caller or end user of the operation. This may be used for tracking - * or rate-limiting purposes. - */ - user?: string; - - /** - * The number of completions choices that should be generated per provided prompt as part of an - * overall completions response. - * Because this setting can generate many completions, it may quickly consume your token quota. - * Use carefully and ensure reasonable settings for max_tokens and stop. - */ - n: number; - - /** - * A value that influences the probability of generated tokens appearing based on their existing - * presence in generated text. - * Positive values will make tokens less likely to appear when they already exist and increase the - * model's likelihood to output new topics. - */ - presencePenalty: number; - - /** - * A value that influences the probability of generated tokens appearing based on their cumulative - * frequency in generated text. - * Positive values will make tokens less likely to appear as their frequency increases and - * decrease the likelihood of the model repeating the same statements verbatim. - */ - frequencyPenalty: number; - - /** A collection of textual sequences that will end completions generation. */ - stop?: string[]; - /** A collection of textual sequences that will end completions generation. */ - stopSequences?: string[]; - - /** Whether to stream the results or not. Enabling disables tokenUsage reporting */ - streaming: boolean; - - /** Model name to use */ - model?: string; - - /** Holds any additional parameters that are valid to pass to {@link - * https://platform.openai.com/docs/api-reference/completions/create | - * `openai.createCompletion`} that are not explicitly specified on this class. - */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any - modelKwargs?: Record; - - /** - * Timeout to use when making requests to OpenAI. - */ - timeout?: number; -} - -export declare interface OpenAIInput extends OpenAIBaseInput { - /** - * A value that controls the emission of log probabilities for the provided number of most likely - * tokens within a completions response. - */ - logprobs?: number; - - /** - * A value specifying whether completions responses should include input prompts as prefixes to - * their generated output. - */ - echo?: boolean; - - /** - * A value that controls how many completions will be internally generated prior to response - * formulation. - * When used together with n, best_of controls the number of candidate completions and must be - * greater than n. - * Because this setting can generate many completions, it may quickly consume your token quota. - * Use carefully and ensure reasonable settings for max_tokens and stop. - */ - bestOf?: number; - - /** Batch size to use when passing multiple documents to generate */ - batchSize: number; -} - -export interface OpenAICallOptions extends BaseLanguageModelCallOptions { - /** - * Additional options to pass to the underlying axios request. - */ - options?: OpenAIClientOptions; -} - -export interface OpenAIChatInput extends OpenAIBaseInput { - /** ChatGPT messages to pass as a prefix to the prompt */ - prefixMessages?: ChatRequestMessage[]; - - azureExtensionOptions?: AzureExtensionsOptions; -} - -export interface OpenAIChatCallOptions extends OpenAICallOptions { - promptIndex?: number; -} - -export interface AzureOpenAIEmbeddingsParams extends EmbeddingsParams { - /** - * An identifier for the caller or end user of the operation. This may be used for tracking - * or rate-limiting purposes. - */ - user?: string; - /** - * The model name to provide as part of this embeddings request. - * Not applicable to Azure OpenAI, where deployment information should be included in the Azure - * resource URI that's connected to. - * Alias for `model` - */ - modelName?: string; - /** - * The model name to provide as part of this embeddings request. - * Not applicable to Azure OpenAI, where deployment information should be included in the Azure - * resource URI that's connected to. - */ - model?: string; - - /** - * The maximum number of documents to embed in a single request. This is - * limited by the OpenAI API to a maximum of 2048. - */ - batchSize?: number; - - /** - * Whether to strip new lines from the input text. This is recommended by - * OpenAI for older models, but may not be suitable for all use cases. - * See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500 - */ - stripNewLines?: boolean; - - /** - * Timeout to use when making requests to OpenAI. - */ - timeout?: number; -} diff --git a/libs/langchain-azure-openai/src/utils/openai-format-fndef.ts b/libs/langchain-azure-openai/src/utils/openai-format-fndef.ts deleted file mode 100644 index 4dcf11dc00db..000000000000 --- a/libs/langchain-azure-openai/src/utils/openai-format-fndef.ts +++ /dev/null @@ -1,131 +0,0 @@ -/** - * Formatting function definitions for calculating openai function defination token usage. - * - * https://github.com/hmarr/openai-chat-tokens/blob/main/src/functions.ts - * (c) 2023 Harry Marr - * MIT license - */ - -import { FunctionDefinition } from "@azure/openai"; - -export interface FunctionDef extends Omit { - name: string; - description?: string; - parameters: ObjectProp; -} - -interface ObjectProp { - type: "object"; - properties?: { - [key: string]: Prop; - }; - required?: string[]; -} - -interface AnyOfProp { - anyOf: Prop[]; -} - -type Prop = { - description?: string; -} & ( - | AnyOfProp - | ObjectProp - | { - type: "string"; - enum?: string[]; - } - | { - type: "number" | "integer"; - minimum?: number; - maximum?: number; - enum?: number[]; - } - | { type: "boolean" } - | { type: "null" } - | { - type: "array"; - items?: Prop; - } -); - -function isAnyOfProp(prop: Prop): prop is AnyOfProp { - return ( - (prop as AnyOfProp).anyOf !== undefined && - Array.isArray((prop as AnyOfProp).anyOf) - ); -} - -// When OpenAI use functions in the prompt, they format them as TypeScript definitions rather than OpenAPI JSON schemas. -// This function converts the JSON schemas into TypeScript definitions. -export function formatFunctionDefinitions(functions: FunctionDef[]) { - const lines = ["namespace functions {", ""]; - for (const f of functions) { - if (f.description) { - lines.push(`// ${f.description}`); - } - if (Object.keys(f.parameters.properties ?? {}).length > 0) { - lines.push(`type ${f.name} = (_: {`); - lines.push(formatObjectProperties(f.parameters, 0)); - lines.push("}) => any;"); - } else { - lines.push(`type ${f.name} = () => any;`); - } - lines.push(""); - } - lines.push("} // namespace functions"); - return lines.join("\n"); -} - -// Format just the properties of an object (not including the surrounding braces) -function formatObjectProperties(obj: ObjectProp, indent: number): string { - const lines: string[] = []; - for (const [name, param] of Object.entries(obj.properties ?? {})) { - if (param.description && indent < 2) { - lines.push(`// ${param.description}`); - } - if (obj.required?.includes(name)) { - lines.push(`${name}: ${formatType(param, indent)},`); - } else { - lines.push(`${name}?: ${formatType(param, indent)},`); - } - } - return lines.map((line) => " ".repeat(indent) + line).join("\n"); -} - -// Format a single property type -function formatType(param: Prop, indent: number): string { - if (isAnyOfProp(param)) { - return param.anyOf.map((v) => formatType(v, indent)).join(" | "); - } - switch (param.type) { - case "string": - if (param.enum) { - return param.enum.map((v) => `"${v}"`).join(" | "); - } - return "string"; - case "number": - if (param.enum) { - return param.enum.map((v) => `${v}`).join(" | "); - } - return "number"; - case "integer": - if (param.enum) { - return param.enum.map((v) => `${v}`).join(" | "); - } - return "number"; - case "boolean": - return "boolean"; - case "null": - return "null"; - case "object": - return ["{", formatObjectProperties(param, indent + 2), "}"].join("\n"); - case "array": - if (param.items) { - return `${formatType(param.items, indent)}[]`; - } - return "any[]"; - default: - return ""; - } -} diff --git a/libs/langchain-azure-openai/tsconfig.cjs.json b/libs/langchain-azure-openai/tsconfig.cjs.json deleted file mode 100644 index a4e3af045bbd..000000000000 --- a/libs/langchain-azure-openai/tsconfig.cjs.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "extends": "./tsconfig.json", - "compilerOptions": { - "module": "commonjs", - "moduleResolution": "node10", - "declaration": false - }, - "exclude": [ - "node_modules", - "dist", - "docs", - "**/tests" - ] -} \ No newline at end of file diff --git a/libs/langchain-azure-openai/tsconfig.json b/libs/langchain-azure-openai/tsconfig.json deleted file mode 100644 index e76b4273f9c3..000000000000 --- a/libs/langchain-azure-openai/tsconfig.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "extends": "@tsconfig/recommended", - "compilerOptions": { - "outDir": "../dist", - "rootDir": "./src", - "target": "ES2021", - "lib": [ - "ES2021", - "ES2022.Object", - "DOM" - ], - "module": "ES2020", - "moduleResolution": "bundler", - "esModuleInterop": true, - "declaration": true, - "noImplicitReturns": true, - "noFallthroughCasesInSwitch": true, - "noUnusedLocals": true, - "noUnusedParameters": true, - "useDefineForClassFields": true, - "strictPropertyInitialization": false, - "allowJs": true, - "strict": true - }, - "include": [ - "src/**/*" - ], - "exclude": [ - "node_modules", - "dist", - "docs" - ] -} diff --git a/libs/langchain-azure-openai/turbo.json b/libs/langchain-azure-openai/turbo.json deleted file mode 100644 index d1bb60a7bb1c..000000000000 --- a/libs/langchain-azure-openai/turbo.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "extends": ["//"], - "tasks": { - "build": { - "outputs": ["**/dist/**"] - }, - "build:internal": { - "dependsOn": ["^build:internal"] - } - } -}