diff --git a/.github/workflows/build-test.yaml b/.github/workflows/build-test.yaml index 59121684..c3666562 100644 --- a/.github/workflows/build-test.yaml +++ b/.github/workflows/build-test.yaml @@ -19,7 +19,7 @@ jobs: name: ${{ matrix.platform }} / Node.js v${{ matrix.node-version }} runs-on: ${{ matrix.platform }} steps: - - run: git config --global core.autocrlf false # Preserve line endings + # - run: git config --global core.autocrlf false # Preserve line endings - uses: actions/checkout@v4 - name: Setup Node.js v${{ matrix.node-version }} uses: actions/setup-node@v4 diff --git a/README.md b/README.md index 21676862..f071e8a8 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ - [Resources](#resources) - [FAQ](#faq) - [Troubleshooting](#troubleshooting) + - [Reduce deployment costs](#reduce-deployment-costs) - [Note](#note) [![Open in GitHub Codespaces](https://img.shields.io/static/v1?style=for-the-badge&label=GitHub+Codespaces&message=Open&color=brightgreen&logo=github)](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=684521881&machine=standardLinux32gb&devcontainer_path=.devcontainer%2Fdevcontainer.json&location=WestUs2) @@ -426,6 +427,10 @@ Here are the most common failure scenarios and solutions: 1. You're getting an error `401 Principal does not have access to API/Operation` while running the project locally or trying to deploy. That's likely because your environment variables include `AZURE_TENANT_ID`, `AZURE_CLIENT_ID` and `AZURE_CLIENT_SECRET`. You should either grant permissions to the related _Service Principal_ or remove these variables from your environment to ensure normal access. For more details, please refer to [Azure identity SDK](https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/identity/identity/README.md#defaultazurecredential). +### Reduce deployment costs + +See [this doc for reducing costs](./docs/low-cost.md). + ### Note > Note: The documents used in this demo contain information generated using a language model (Azure OpenAI Service). The information contained in these documents is only for demonstration purposes and does not reflect the opinions or beliefs of Microsoft. Microsoft makes no representations or warranties of any kind, express or implied, about the completeness, accuracy, reliability, suitability or availability with respect to the information contained in this document. All rights reserved to Microsoft. diff --git a/docs/low-cost.md b/docs/low-cost.md new file mode 100644 index 00000000..40a45e5d --- /dev/null +++ b/docs/low-cost.md @@ -0,0 +1,31 @@ +# Reduce deployment costs + +This AI RAG chat application is designed to be easily deployed using the Azure Developer CLI, which provisions the infrastructure according to the Bicep files in the `infra` folder. Those files describe each of the Azure resources needed, and configures their SKU (pricing tier) and other parameters. Many Azure services offer a free tier, but the infrastructure files in this project do *not* default to the free tier as there are often limitations in that tier. + +However, if your goal is to minimize costs while prototyping your application, follow the steps below *before* running `azd up`. Once you've gone through these steps, return to the [deployment steps](../README.md#azure-deployment). + +1. Log in to your Azure account using the Azure Developer CLI: + + ```shell + azd auth login + ``` + +1. Create a new azd environment for the free resource group: + + ```shell + azd env new + ``` + + Enter a name that will be used for the resource group. + This will create a new folder in the `.azure` folder, and set it as the active environment for any calls to `azd` going forward. + +1. Use the free tier of Azure AI Search: + + ```shell + azd env set AZURE_SEARCH_SERVICE_SKU free + ``` + + Limitations: + 1. You are only allowed one free search service across all regions. + 2. The free tier does not support semantic ranker. Note that will generally result in [decreased search relevance](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/azure-ai-search-outperforming-vector-search-with-hybrid-retrieval-and-ranking-ca/3929167). + diff --git a/infra/main.bicep b/infra/main.bicep index 26a4b33f..f9e21bc1 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -358,7 +358,7 @@ module searchService 'core/search/search-services.bicep' = { sku: { name: searchServiceSkuName } - semanticSearch: 'free' + semanticSearch: searchServiceSkuName == 'standard' ? 'free' : 'disabled' } } @@ -526,6 +526,7 @@ output AZURE_OPENAI_EMBEDDING_MODEL string = embeddingModelName output AZURE_SEARCH_INDEX string = searchIndexName output AZURE_SEARCH_SERVICE string = searchService.outputs.name output AZURE_SEARCH_SERVICE_RESOURCE_GROUP string = searchServiceResourceGroup.name +output AZURE_SEARCH_SEMANTIC_RANKER string = searchServiceSkuName == 'standard' ? 'enabled' : 'disabled' output AZURE_STORAGE_ACCOUNT string = storage.outputs.name output AZURE_STORAGE_CONTAINER string = storageContainerName diff --git a/package-lock.json b/package-lock.json index 09a32b36..bf043f57 100644 --- a/package-lock.json +++ b/package-lock.json @@ -160,17 +160,29 @@ } }, "node_modules/@azure/core-http-compat": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@azure/core-http-compat/-/core-http-compat-1.3.0.tgz", - "integrity": "sha512-ZN9avruqbQ5TxopzG3ih3KRy52n8OAbitX3fnZT5go4hzu0J+KVPSzkL+Wt3hpJpdG8WIfg1sBD1tWkgUdEpBA==", + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@azure/core-http-compat/-/core-http-compat-2.2.0.tgz", + "integrity": "sha512-1kW8ZhN0CfbNOG6C688z5uh2yrzALE7dDXHiR9dY4vt+EbhGZQSbjDa5bQd2rf3X2pdWMsXbqbArxUyeNdvtmg==", "license": "MIT", "dependencies": { - "@azure/abort-controller": "^1.0.4", + "@azure/abort-controller": "^2.0.0", "@azure/core-client": "^1.3.0", - "@azure/core-rest-pipeline": "^1.3.0" + "@azure/core-rest-pipeline": "^1.19.0" }, "engines": { - "node": ">=12.0.0" + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-http-compat/node_modules/@azure/abort-controller": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz", + "integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==", + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" } }, "node_modules/@azure/core-lro": { @@ -504,23 +516,24 @@ } }, "node_modules/@azure/search-documents": { - "version": "12.0.0-beta.3", - "resolved": "https://registry.npmjs.org/@azure/search-documents/-/search-documents-12.0.0-beta.3.tgz", - "integrity": "sha512-if8NmeRtg1sU8MdzPKWPSVi77NhY4ZeInvg8zYXIrPIUzNtEy3LRYYJonH/MyImGJkQqn76vxHjjpHENycNuxA==", + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/@azure/search-documents/-/search-documents-12.1.0.tgz", + "integrity": "sha512-IzD+hfqGqFtXymHXm4RzrZW2MsSH2M7RLmZsKaKVi7SUxbeYTUeX+ALk8gVzkM8ykb7EzlDLWCNErKfAa57rYQ==", "license": "MIT", "dependencies": { "@azure/core-auth": "^1.3.0", "@azure/core-client": "^1.3.0", - "@azure/core-http-compat": "^1.2.0", + "@azure/core-http-compat": "^2.0.1", "@azure/core-paging": "^1.1.1", "@azure/core-rest-pipeline": "^1.3.0", "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.0.0", "@azure/logger": "^1.0.0", "events": "^3.0.0", "tslib": "^2.2.0" }, "engines": { - "node": ">=14.0.0" + "node": ">=18.0.0" } }, "node_modules/@azure/search-documents/node_modules/@azure/core-tracing": { @@ -571,20 +584,6 @@ "node": ">=18.0.0" } }, - "node_modules/@azure/storage-blob/node_modules/@azure/core-http-compat": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/@azure/core-http-compat/-/core-http-compat-2.2.0.tgz", - "integrity": "sha512-1kW8ZhN0CfbNOG6C688z5uh2yrzALE7dDXHiR9dY4vt+EbhGZQSbjDa5bQd2rf3X2pdWMsXbqbArxUyeNdvtmg==", - "license": "MIT", - "dependencies": { - "@azure/abort-controller": "^2.0.0", - "@azure/core-client": "^1.3.0", - "@azure/core-rest-pipeline": "^1.19.0" - }, - "engines": { - "node": ">=18.0.0" - } - }, "node_modules/@azure/storage-blob/node_modules/@azure/core-tracing": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/@azure/core-tracing/-/core-tracing-1.2.0.tgz", @@ -13608,7 +13607,7 @@ "@azure/core-http": "^3.0.5", "@azure/identity": "^4.0.1", "@azure/monitor-opentelemetry": "^1.0.0-beta.2", - "@azure/search-documents": "12.0.0-beta.3", + "@azure/search-documents": "^12.1.0", "@azure/storage-blob": "^12.15.0", "@dqbd/tiktoken": "^1.0.7", "@fastify/autoload": "^6.1.0", @@ -13657,7 +13656,7 @@ "@azure/core-http": "^3.0.5", "@azure/identity": "^4.0.1", "@azure/monitor-opentelemetry": "^1.0.0-beta.2", - "@azure/search-documents": "12.0.0-beta.3", + "@azure/search-documents": "^12.1.0", "@azure/storage-blob": "^12.15.0", "@dqbd/tiktoken": "^1.0.7", "@fastify/autoload": "^6.1.0", diff --git a/packages/indexer/package.json b/packages/indexer/package.json index 5e1acc43..9235d029 100644 --- a/packages/indexer/package.json +++ b/packages/indexer/package.json @@ -26,7 +26,7 @@ "@azure/core-http": "^3.0.5", "@azure/identity": "^4.0.1", "@azure/monitor-opentelemetry": "^1.0.0-beta.2", - "@azure/search-documents": "12.0.0-beta.3", + "@azure/search-documents": "^12.1.0", "@azure/storage-blob": "^12.15.0", "@dqbd/tiktoken": "^1.0.7", "@fastify/autoload": "^6.1.0", diff --git a/packages/indexer/src/lib/indexer.ts b/packages/indexer/src/lib/indexer.ts index 41e9779a..46994806 100644 --- a/packages/indexer/src/lib/indexer.ts +++ b/packages/indexer/src/lib/indexer.ts @@ -37,7 +37,7 @@ export class Indexer { this.blobStorage = new BlobStorage(logger, azure); } - async createSearchIndex(indexName: string) { + async createSearchIndex(indexName: string, useSemanticRanker = false) { this.logger.debug(`Ensuring search index "${indexName}" exists`); const searchIndexClient = this.azure.searchIndex; @@ -52,6 +52,45 @@ export class Indexer { } else { const index: SearchIndex = { name: indexName, + vectorSearch: { + algorithms: [ + { + name: 'vector-search-algorithm', + kind: 'hnsw', + parameters: { + m: 4, + efSearch: 500, + metric: 'cosine', + efConstruction: 400, + }, + }, + ], + profiles: [ + { + name: 'vector-search-profile', + algorithmConfigurationName: 'vector-search-algorithm', + }, + ], + }, + ...(useSemanticRanker + ? { + semanticSearch: { + defaultConfigurationName: 'semantic-search-config', + configurations: [ + { + name: 'semantic-search-config', + prioritizedFields: { + contentFields: [ + { + name: 'content', + }, + ], + }, + }, + ], + }, + } + : {}), fields: [ { name: 'id', @@ -73,7 +112,7 @@ export class Indexer { sortable: false, facetable: false, vectorSearchDimensions: 1536, - vectorSearchConfiguration: 'default', + vectorSearchProfileName: 'vector-search-profile', }, { name: 'category', @@ -94,27 +133,6 @@ export class Indexer { facetable: true, }, ], - semanticSettings: { - configurations: [ - { - name: 'default', - prioritizedFields: { - prioritizedContentFields: [{ name: 'content' }], - }, - }, - ], - }, - vectorSearch: { - algorithmConfigurations: [ - { - name: 'default', - kind: 'hnsw', - parameters: { - metric: 'cosine', - }, - }, - ], - }, }; this.logger.debug(`Creating "${indexName}" search index...`); await searchIndexClient.createIndex(index); diff --git a/packages/indexer/src/plugins/config.ts b/packages/indexer/src/plugins/config.ts index 03969aa6..fa275a75 100644 --- a/packages/indexer/src/plugins/config.ts +++ b/packages/indexer/src/plugins/config.ts @@ -8,6 +8,7 @@ export interface AppConfig { azureStorageContainer: string; azureSearchService: string; azureSearchIndex: string; + azureSearchSemanticRanker: string; azureOpenAiService: string; azureOpenAiEmbeddingDeployment: string; azureOpenAiEmbeddingModel: string; @@ -28,6 +29,7 @@ export default fp( azureStorageAccount: process.env.AZURE_STORAGE_ACCOUNT || '', azureStorageContainer: process.env.AZURE_STORAGE_CONTAINER || '', azureSearchService: process.env.AZURE_SEARCH_SERVICE || '', + azureSearchSemanticRanker: process.env.AZURE_SEARCH_SEMANTIC_RANKER || 'disabled', azureSearchIndex: process.env.AZURE_SEARCH_INDEX || '', azureOpenAiService: process.env.AZURE_OPENAI_SERVICE || '', azureOpenAiEmbeddingDeployment: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT || '', diff --git a/packages/indexer/src/routes/indexes/index.ts b/packages/indexer/src/routes/indexes/index.ts index ef444d1c..49c7febd 100644 --- a/packages/indexer/src/routes/indexes/index.ts +++ b/packages/indexer/src/routes/indexes/index.ts @@ -33,7 +33,7 @@ const root: FastifyPluginAsyncJsonSchemaToTs = async (fastify, _options): Promis handler: async function (request, reply) { const { name } = request.body; try { - await fastify.indexer.createSearchIndex(name); + await fastify.indexer.createSearchIndex(name, fastify.config.azureSearchSemanticRanker !== 'enabled'); reply.code(204); } catch (_error: unknown) { const error = _error as Error; diff --git a/packages/indexer/test/helper.ts b/packages/indexer/test/helper.ts index d0d55cdc..a0fa143b 100644 --- a/packages/indexer/test/helper.ts +++ b/packages/indexer/test/helper.ts @@ -24,6 +24,7 @@ async function config() { process.env.AZURE_SEARCH_SERVICE = 'https://example.com'; process.env.AZURE_STORAGE_ACCOUNT = 'dummystorage'; process.env.AZURE_STORAGE_CONTAINER = 'testfiles'; + return {}; } diff --git a/packages/search/package.json b/packages/search/package.json index 6cccd894..5a524e25 100644 --- a/packages/search/package.json +++ b/packages/search/package.json @@ -23,7 +23,7 @@ "@azure/core-http": "^3.0.5", "@azure/identity": "^4.0.1", "@azure/monitor-opentelemetry": "^1.0.0-beta.2", - "@azure/search-documents": "12.0.0-beta.3", + "@azure/search-documents": "^12.1.0", "@azure/storage-blob": "^12.15.0", "@dqbd/tiktoken": "^1.0.7", "@fastify/autoload": "^6.1.0", diff --git a/packages/search/src/lib/approaches/approach-base.ts b/packages/search/src/lib/approaches/approach-base.ts index 4dcc21ed..757a4fb5 100644 --- a/packages/search/src/lib/approaches/approach-base.ts +++ b/packages/search/src/lib/approaches/approach-base.ts @@ -43,29 +43,40 @@ export class ApproachBase { ? this.search.search(queryText, { filter, queryType: 'semantic', - queryLanguage: 'en-us', - speller: 'lexicon', - semanticConfiguration: 'default', + semanticSearchOptions: { + configurationName: 'semantic-search-config', + captions: useSemanticCaption + ? { + captionType: 'extractive', + highlight: false, + } + : undefined, + }, top, - captions: useSemanticCaption ? 'extractive|highlight-false' : undefined, - vectors: [ - { - value: queryVector, - kNearestNeighborsCount: queryVector ? 50 : undefined, - fields: queryVector ? ['embedding'] : undefined, - }, - ], + vectorSearchOptions: { + queries: [ + { + kind: 'vector', + vector: queryVector!, + kNearestNeighborsCount: queryVector ? 50 : undefined, + fields: queryVector ? ['embedding'] : undefined, + }, + ], + }, }) : this.search.search(queryText, { filter, top, - vectors: [ - { - value: queryVector, - kNearestNeighborsCount: queryVector ? 50 : undefined, - fields: queryVector ? ['embedding'] : undefined, - }, - ], + vectorSearchOptions: { + queries: [ + { + kind: 'vector', + vector: queryVector!, + kNearestNeighborsCount: queryVector ? 50 : undefined, + fields: queryVector ? ['embedding'] : undefined, + }, + ], + }, })); const results: string[] = []; @@ -87,32 +98,4 @@ export class ApproachBase { const content = results.join('\n'); return { query: queryText ?? '', results, content }; } - - protected async lookupDocument(query: string): Promise { - const searchResults = await this.search.search(query, { - top: 1, - includeTotalCount: true, - queryType: 'semantic', - queryLanguage: 'en-us', - speller: 'lexicon', - semanticConfiguration: 'default', - answers: 'extractive|count-1', - captions: 'extractive|highlight-false', - }); - - const answers = await searchResults.answers; - if (answers && answers.length > 0) { - return answers[0].text; - } - if (searchResults.count ?? 0 > 0) { - const results: string[] = []; - for await (const result of searchResults.results) { - // TODO: ensure typings - const document = result.document as any; - results.push(document[this.contentField]); - } - return results.join('\n'); - } - return undefined; - } } diff --git a/packages/search/src/plugins/config.ts b/packages/search/src/plugins/config.ts index 23f68190..4c55718d 100644 --- a/packages/search/src/plugins/config.ts +++ b/packages/search/src/plugins/config.ts @@ -8,6 +8,7 @@ export interface AppConfig { azureStorageContainer: string; azureSearchService: string; azureSearchIndex: string; + azureSearchSemanticRanker: string; azureOpenAiService: string; azureOpenAiChatGptDeployment: string; azureOpenAiChatGptModel: string; @@ -32,6 +33,7 @@ export default fp( azureStorageContainer: process.env.AZURE_STORAGE_CONTAINER || '', azureSearchService: process.env.AZURE_SEARCH_SERVICE || '', azureSearchIndex: process.env.AZURE_SEARCH_INDEX || '', + azureSearchSemanticRanker: process.env.AZURE_SEARCH_SEMANTIC_RANKER || 'disabled', azureOpenAiService: process.env.AZURE_OPENAI_SERVICE || '', azureOpenAiChatGptDeployment: process.env.AZURE_OPENAI_CHATGPT_DEPLOYMENT || '', azureOpenAiChatGptModel: process.env.AZURE_OPENAI_CHATGPT_MODEL || 'gpt-4o-mini', diff --git a/packages/search/src/routes/root.ts b/packages/search/src/routes/root.ts index 40810a74..a21ea0e0 100644 --- a/packages/search/src/routes/root.ts +++ b/packages/search/src/routes/root.ts @@ -5,6 +5,7 @@ import { Readable } from 'node:stream'; import { type FastifyPluginAsync } from 'fastify'; import { type JsonSchemaToTsProvider } from '@fastify/type-provider-json-schema-to-ts'; import { type SchemaTypes } from '../plugins/schemas.js'; +import { type ApproachContext } from '../lib/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -74,6 +75,11 @@ const root: FastifyPluginAsync = async (_fastify, _options): Promise => { } const { messages, context, stream } = request.body; + let approachContext: ApproachContext = (context as any) ?? {}; + if (this.config.azureSearchSemanticRanker !== 'enabled') { + approachContext = { ...approachContext, semantic_ranker: false }; + } + try { if (stream) { const buffer = new Readable(); @@ -81,14 +87,14 @@ const root: FastifyPluginAsync = async (_fastify, _options): Promise => { buffer._read = () => {}; reply.type('application/x-ndjson').send(buffer); - const chunks = await chatApproach.runWithStreaming(messages, (context as any) ?? {}); + const chunks = await chatApproach.runWithStreaming(messages, approachContext); for await (const chunk of chunks) { buffer.push(JSON.stringify(chunk) + '\n'); } // eslint-disable-next-line unicorn/no-null buffer.push(null); } else { - return await chatApproach.run(messages, (context as any) ?? {}); + return await chatApproach.run(messages, approachContext); } } catch (_error: unknown) { const error = _error as Error & { error?: any; status?: number }; @@ -121,6 +127,11 @@ const root: FastifyPluginAsync = async (_fastify, _options): Promise => { } const { messages, context, stream } = request.body; + let approachContext: ApproachContext = (context as any) ?? {}; + if (this.config.azureSearchSemanticRanker !== 'enabled') { + approachContext = { ...approachContext, semantic_ranker: false }; + } + try { if (stream) { const buffer = new Readable(); @@ -128,14 +139,14 @@ const root: FastifyPluginAsync = async (_fastify, _options): Promise => { buffer._read = () => {}; reply.type('application/x-ndjson').send(buffer); - const chunks = await askApproach.runWithStreaming(messages[0].content, (context as any) ?? {}); + const chunks = await askApproach.runWithStreaming(messages[0].content, approachContext); for await (const chunk of chunks) { buffer.push(JSON.stringify(chunk) + '\n'); } // eslint-disable-next-line unicorn/no-null buffer.push(null); } else { - return await askApproach.run(messages[0].content, (context as any) ?? {}); + return await askApproach.run(messages[0].content, approachContext); } } catch (_error: unknown) { const error = _error as Error & { error?: any; status?: number }; diff --git a/packages/search/test.http b/packages/search/test.http index 890afa27..7ae17cbd 100644 --- a/packages/search/test.http +++ b/packages/search/test.http @@ -71,22 +71,3 @@ Content-Type: application/json } } -### - -# Ask a question using the rrr approach -POST {{api_host}}/ask -Content-Type: application/json - -{ - "messages": [{ - "content": "How to contact a representative?", - "role": "user" - }], - "context": { - "approach":"rrr", - "retrieval_mode": "hybrid", - "semantic_ranker": true, - "semantic_captions": false, - "top": 3 - } -}