diff --git a/notebooks/field_extraction.ipynb b/notebooks/field_extraction.ipynb index e13e747..9787137 100644 --- a/notebooks/field_extraction.ipynb +++ b/notebooks/field_extraction.ipynb @@ -13,7 +13,7 @@ "source": [ "This notebook demonstrates how to use analyzers to extract custom fields from your input files.\n", "\n", - "Content Understanding provides **extensive prebuilt analyzers** ready to use without training. Always start with prebuilt analyzers before building custom solutions." + "Azure AI Content Understanding provides **extensive prebuilt analyzers** ready to use without training. We recommend always starting with prebuilt analyzers before building custom solutions." ] }, { @@ -21,8 +21,8 @@ "metadata": {}, "source": [ "## Prerequisites\n", - "1. Ensure Azure AI service is configured following [steps](../README.md#configure-azure-ai-service-resource)\n", - "2. Install the required packages to run the sample." + "1. Ensure your Azure AI service is configured by following the [configuration steps](../README.md#configure-azure-ai-service-resource).\n", + "2. Install the required packages to run this sample." ] }, { @@ -40,14 +40,16 @@ "source": [ "## Create Azure AI Content Understanding Client\n", "\n", - "> The [AzureContentUnderstandingClient](../python/content_understanding_client.py) is a utility class containing functions to interact with the Content Understanding API. Before the official release of the Content Understanding SDK, it can be regarded as a lightweight SDK. Fill the constant **AZURE_AI_ENDPOINT**, **AZURE_AI_API_VERSION**, **AZURE_AI_API_KEY** with the information from your Azure AI Service.\n", + "> The [AzureContentUnderstandingClient](../python/content_understanding_client.py) is a utility class containing functions to interact with the Content Understanding API. It serves as a lightweight SDK before the official release.\n", + "\n", + "> **Please fill in the constants**: **AZURE_AI_ENDPOINT**, **AZURE_AI_API_VERSION**, and **AZURE_AI_API_KEY** with the information from your Azure AI service.\n", "\n", "> ⚠️ Important:\n", "You must update the code below to match your Azure authentication method.\n", "Look for the `# IMPORTANT` comments and modify those sections accordingly.\n", - "If you skip this step, the sample may not run correctly.\n", + "If you skip this, the sample may not run correctly.\n", "\n", - "> ⚠️ Note: Using a subscription key works, but using a token provider with Azure Active Directory (AAD) is much safer and is highly recommended for production environments." + "> ⚠️ Note: While using a subscription key works, we highly recommend using a token provider with Azure Active Directory (AAD) for greater security in production environments." ] }, { @@ -123,20 +125,21 @@ "source": [ "## Configure Model Deployments for Prebuilt Analyzers\n", "\n", - "> **💡 Note:** This step is only required **once per Azure Content Understanding resource**, unless the GPT deployment has been changed. You can skip this section if:\n", - "> - This configuration has already been run once for your resource, or\n", - "> - Your administrator has already configured the model deployments for you\n", + "> **💡 Note:** This step is required **once per Azure Content Understanding resource**, unless the GPT deployment has changed.\n", + "> You can skip this if:\n", + "> - This configuration has already been completed for your resource, or\n", + "> - Your administrator has already configured the model deployments for you.\n", "\n", - "Before using prebuilt analyzers, you need to configure the default model deployment mappings. This tells Content Understanding which model deployments to use.\n", + "Before using prebuilt analyzers, you need to configure default model deployment mappings. This tells Content Understanding which model deployments to use.\n", "\n", "**Model Requirements:**\n", "- **GPT-4.1** - Required for most prebuilt analyzers (e.g., `prebuilt-invoice`, `prebuilt-receipt`, `prebuilt-idDocument`)\n", - "- **GPT-4.1-mini** - Required for RAG analyzers (e.g., `prebuilt-documentSearch`, `prebuilt-audioSearch`, `prebuilt-videoSearch`)\n", - "- **text-embedding-3-large** - Required for all prebuilt analyzers that use embeddings\n", + "- **GPT-4.1-mini** - Required for retrieval-augmented generation (RAG) analyzers (e.g., `prebuilt-documentSearch`, `prebuilt-audioSearch`, `prebuilt-videoSearch`)\n", + "- **text-embedding-3-large** - Required for embedding-based prebuilt analyzers\n", "\n", "**Prerequisites:**\n", - "1. Deploy **GPT-4.1**, **GPT-4.1-mini**, and **text-embedding-3-large** models in Azure AI Foundry\n", - "2. Set `GPT_4_1_DEPLOYMENT`, `GPT_4_1_MINI_DEPLOYMENT`, and `TEXT_EMBEDDING_3_LARGE_DEPLOYMENT` in your `.env` file with the deployment names\n" + "1. Deploy **GPT-4.1**, **GPT-4.1-mini**, and **text-embedding-3-large** models in Azure AI Foundry.\n", + "2. Set the environment variables `GPT_4_1_DEPLOYMENT`, `GPT_4_1_MINI_DEPLOYMENT`, and `TEXT_EMBEDDING_3_LARGE_DEPLOYMENT` in your `.env` file with the corresponding deployment names." ] }, { @@ -164,13 +167,13 @@ " for deployment in missing_deployments:\n", " print(f\" - {deployment}\")\n", " print(\"\\n Prebuilt analyzers require GPT-4.1, GPT-4.1-mini, and text-embedding-3-large deployments.\")\n", - " print(\" Please:\")\n", - " print(\" 1. Deploy all three models in Azure AI Foundry\")\n", + " print(\" Please ensure you do the following:\")\n", + " print(\" 1. Deploy all three models in Azure AI Foundry.\")\n", " print(\" 2. Add the following to notebooks/.env:\")\n", " print(\" GPT_4_1_DEPLOYMENT=\")\n", " print(\" GPT_4_1_MINI_DEPLOYMENT=\")\n", " print(\" TEXT_EMBEDDING_3_LARGE_DEPLOYMENT=\")\n", - " print(\" 3. Restart the kernel and run this cell again\")\n", + " print(\" 3. Restart the kernel and run this cell again.\")\n", "else:\n", " print(f\"📋 Configuring default model deployments...\")\n", " print(f\" GPT-4.1 deployment: {GPT_4_1_DEPLOYMENT}\")\n", @@ -191,9 +194,9 @@ " print(f\" {model} → {deployment}\")\n", " except Exception as e:\n", " print(f\"❌ Failed to configure defaults: {e}\")\n", - " print(f\" This may happen if:\")\n", - " print(f\" - One or more deployment names don't exist in your Azure AI Foundry project\")\n", - " print(f\" - You don't have permission to update defaults\")\n", + " print(f\" Possible reasons include:\")\n", + " print(f\" - One or more deployment names don't exist in your Azure AI Foundry project.\")\n", + " print(f\" - You lack permission to update defaults.\")\n", " raise\n" ] }, @@ -205,21 +208,21 @@ "\n", "## Why Start with Prebuilt Analyzers?\n", "\n", - "Azure AI Content Understanding provides **70+ production-ready prebuilt analyzers** that cover common scenarios across finance, healthcare, legal, tax, and business domains. These analyzers are:\n", + "Azure AI Content Understanding provides **70+ production-ready prebuilt analyzers** covering common scenarios across finance, healthcare, legal, tax, and business domains. These analyzers are:\n", "\n", - "- **Immediately Available** - No training, configuration, or customization needed \n", - "- **Battle-Tested** - Built on rich knowledge bases of thousands of real-world document examples \n", - "- **Continuously Improved** - Regularly updated by Microsoft to handle document variations \n", - "- **Cost-Effective** - Save development time and resources by using proven solutions \n", - "- **Comprehensive Coverage** - Extensive support for Financial documents (invoices, receipts, bank statements, credit cards), Identity documents (passports, driver licenses, ID cards, health insurance), Tax documents (40+ US tax forms including 1040, W-2, 1099 variants), Mortgage documents (applications, appraisals, disclosures), Business documents (contracts, purchase orders, procurement), and many more specialized scenarios\n", + "- **Immediately Available** — No training, configuration, or customization needed.\n", + "- **Battle-Tested** — Built on extensive knowledge bases of thousands of real-world document examples.\n", + "- **Continuously Improved** — Regularly updated by Microsoft to handle document variations.\n", + "- **Cost-Effective** — Save development time and resources by using proven solutions.\n", + "- **Comprehensive Coverage** — Support for financial documents (invoices, receipts, bank statements, credit cards), identity documents (passports, driver licenses, ID cards, health insurance), tax documents (40+ US tax forms including 1040, W-2, 1099 variants), mortgage documents (applications, appraisals, disclosures), business documents (contracts, purchase orders, procurement), and many other specialized scenarios.\n", "\n", - "> **Best Practice**: Always explore prebuilt analyzers first. Build custom analyzers only when prebuilt options don't meet your specific requirements.\n", + "> **Best Practice:** Always explore prebuilt analyzers first. Build custom analyzers only when prebuilt options don't meet your specific requirements.\n", "\n", "### Complete List of Prebuilt Analyzer Categories\n", "\n", "**Content Extraction & RAG**\n", - "- `prebuilt-read`, `prebuilt-layout` - OCR and layout analysis\n", - "- `prebuilt-documentSearch`, `prebuilt-imageSearch`, `prebuilt-audioSearch`, `prebuilt-videoSearch` - RAG-optimized\n", + "- `prebuilt-read`, `prebuilt-layout` — OCR and layout analysis\n", + "- `prebuilt-documentSearch`, `prebuilt-imageSearch`, `prebuilt-audioSearch`, `prebuilt-videoSearch` — Retrieval-augmented generation (RAG) optimized\n", "\n", "**Financial Documents**\n", "- `prebuilt-invoice`, `prebuilt-receipt`, `prebuilt-creditCard`, `prebuilt-bankStatement.us`, `prebuilt-check.us`, `prebuilt-creditMemo`\n", @@ -239,7 +242,7 @@ "**Other Specialized**\n", "- `prebuilt-utilityBill`, `prebuilt-payStub.us`, and more\n", "\n", - "> **Learn More**: [Complete Prebuilt Analyzers Documentation](https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/prebuilt-analyzers)" + "> **Learn More:** [Complete Prebuilt Analyzers Documentation](https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/prebuilt-analyzers)" ] }, { @@ -248,14 +251,14 @@ "source": [ "## Build Custom Analyzers (When Needed)\n", "\n", - "Create custom analyzers only when prebuilt ones don't meet your needs:\n", + "Create custom analyzers only when prebuilt ones don't meet your specific needs:\n", "- Extract fields specific to your business\n", "- Process proprietary document types\n", "- Customize extraction logic for unique requirements\n", "\n", "**This notebook demonstrates both approaches:**\n", - "1. **Part 1**: Using prebuilt analyzers (receipts, invoices)\n", - "2. **Part 2**: Creating custom analyzers when prebuilt options aren't sufficient" + "1. **Part 1:** Using prebuilt analyzers (receipts, invoices)\n", + "2. **Part 2:** Creating custom analyzers when prebuilt options aren't sufficient" ] }, { @@ -264,7 +267,7 @@ "source": [ "## 1. Invoice Field Extraction with Prebuilt Analyzer\n", "\n", - "Let's demonstrate using `prebuilt-invoice` to extract structured data from an invoice PDF. This analyzer automatically identifies vendor information, invoice numbers, dates, line items, totals, taxes, and payment details without any configuration.\n" + "Let's demonstrate using `prebuilt-invoice` to extract structured data from an invoice PDF. This analyzer automatically identifies vendor information, invoice numbers, dates, line items, totals, taxes, and payment details without any configuration." ] }, { @@ -379,7 +382,7 @@ "source": [ "## 2. Receipt Field Extraction with Prebuilt Analyzer\n", "\n", - "Let's demonstrate using `prebuilt-receipt` to extract structured data from a receipt image. This analyzer automatically identifies merchant information, items, totals, taxes, and payment details without any configuration.\n" + "Let's demonstrate using `prebuilt-receipt` to extract structured data from a receipt image. This analyzer automatically identifies merchant information, items, totals, taxes, and payment details without any configuration." ] }, { @@ -410,7 +413,7 @@ "source": [ "**Receipt Analysis Results**\n", "\n", - "Let's examine the extracted fields from the receipt:\n" + "Let's review the extracted fields from the receipt:\n" ] }, { @@ -421,7 +424,7 @@ "source": [ "# Save the analysis result to a file\n", "saved_file_path = save_json_to_file(analysis_result, filename_prefix=\"prebuilt_receipt_analysis_result\")\n", - "# Print the full analysis result as a JSON string\n", + "# Print the full analysis result as a formatted JSON string\n", "print(json.dumps(analysis_result, indent=2))\n" ] }, @@ -431,7 +434,7 @@ "source": [ "# Custom Analyzers\n", "\n", - "Now let's explore creating custom analyzers to extract specific fields tailored to your needs. Custom analyzers allow you to define exactly what information you want to extract and how it should be structured.\n" + "Now let's explore creating custom analyzers to extract specific fields tailored to your needs. Custom analyzers allow you to define exactly what information you want to extract and how it should be structured." ] }, { @@ -440,29 +443,29 @@ "source": [ "**Key Analyzer Configuration Components:**\n", "\n", - "- **`baseAnalyzerId`**: Specifies which prebuilt analyzer to inherit from. Available base analyzers:\n", - " - **`prebuilt-document`** - For document-based custom analyzers (PDFs, images, Office docs)\n", - " - **`prebuilt-audio`** - For audio-based custom analyzers\n", - " - **`prebuilt-video`** - For video-based custom analyzers\n", - " - **`prebuilt-image`** - For image-based custom analyzers\n", + "- **`baseAnalyzerId`**: Specifies which prebuilt analyzer to inherit from. Available base analyzers include:\n", + " - **`prebuilt-document`** — For document-based custom analyzers (PDFs, images, Office docs)\n", + " - **`prebuilt-audio`** — For audio-based custom analyzers\n", + " - **`prebuilt-video`** — For video-based custom analyzers\n", + " - **`prebuilt-image`** — For image-based custom analyzers\n", "\n", "- **`fieldSchema`**: Defines the structured data to extract from content:\n", " - **`fields`**: Object defining each field to extract, with field names as keys\n", " - Each field definition includes:\n", " - **`type`**: Data type (`string`, `number`, `boolean`, `date`, `object`, `array`)\n", - " - **`description`**: Clear explanation of the field - acts as a prompt to guide extraction accuracy\n", + " - **`description`**: A clear explanation of the field acting as a prompt to guide extraction accuracy\n", " - **`method`**: Extraction method to use:\n", - " - **`\"extract\"`** - Extract values as they appear in content (literal text extraction). Requires `estimateSourceAndConfidence: true`. Only supported for document analyzers.\n", - " - **`\"generate\"`** - Generate values using AI based on content understanding (best for complex fields)\n", - " - **`\"classify\"`** - Classify values against predefined categories (use with `enum`)\n", + " - **`\"extract\"`** — Extract values as they appear in content (literal text extraction). Requires `estimateSourceAndConfidence: true`. Only supported for document analyzers.\n", + " - **`\"generate\"`** — Generate values using AI based on content understanding (best for complex fields)\n", + " - **`\"classify\"`** — Classify values against predefined categories (use with `enum`)\n", " - **`enum`**: (Optional) Fixed list of possible values for classification\n", - " - **`items`**: (For arrays) Defines structure of array elements\n", - " - **`properties`**: (For objects) Defines nested field structure\n", + " - **`items`**: (For arrays) Defines the structure of array elements\n", + " - **`properties`**: (For objects) Defines nested field structures\n", "\n", "- **`config`**: Processing options that control analysis behavior:\n", " - **`returnDetails`**: Include confidence scores, bounding boxes, metadata (default: false)\n", " - **`enableOcr`**: Extract text from images/scans (default: true, document only)\n", - " - **`enableLayout`**: Extract layout info like paragraphs, structure (default: true, document only)\n", + " - **`enableLayout`**: Extract layout info like paragraphs and structure (default: true, document only)\n", " - **`estimateFieldSourceAndConfidence`**: Return source locations and confidence for extracted fields (document only)\n", " - **`locales`**: Language codes for transcription (audio/video, e.g., `[\"en-US\"]`)\n", " - **`contentCategories`**: Define categories for classification and segmentation\n", @@ -472,7 +475,7 @@ " - **`completion`**: Model for extraction/generation tasks (e.g., `\"gpt-4o\"`, `\"gpt-4o-mini\"`)\n", " - **`embedding`**: Model for embedding tasks when using knowledge bases\n", "\n", - "For complete details, see the [Analyzer Reference Documentation](https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/analyzer-reference).\n" + "For full details, see the [Analyzer Reference Documentation](https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/concepts/analyzer-reference).\n" ] }, { @@ -481,7 +484,7 @@ "source": [ "# Document Analysis\n", "\n", - "Let's start with document analysis by extracting fields from invoices and receipts. This modality is excellent for processing structured documents and extracting key information like amounts, dates, vendor details, and line items." + "Let's start with document analysis by extracting fields from invoices and receipts. This modality is excellent for processing structured documents and extracting key information such as amounts, dates, vendor details, and line items." ] }, { @@ -490,7 +493,7 @@ "source": [ "## 1. Invoice Field Extraction\n", "\n", - "Let's extract fields from an invoice PDF. This analyzer identifies essential invoice elements such as vendor information, amounts, dates, and line items." + "We'll extract fields from an invoice PDF. This analyzer identifies essential invoice elements such as vendor information, amounts, dates, and line items." ] }, { @@ -499,7 +502,7 @@ "source": [ "**Create and Run Invoice Analyzer**\n", "\n", - "Now let's create the invoice analyzer and process our sample invoice:" + "Now let's create the custom invoice analyzer and process our sample invoice:" ] }, { @@ -569,7 +572,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's run the custom analyzer with a invoice pdf." + "Let's run the custom analyzer with an invoice PDF." ] }, { @@ -655,8 +658,9 @@ " \n", " # Save full result to file\n", " timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", - " output_file = f\"test_output/invoice_analysis_result_{timestamp}.json\"\n", - " os.makedirs(\"test_output\", exist_ok=True)\n", + " output_folder = \"test_output\"\n", + " os.makedirs(output_folder, exist_ok=True)\n", + " output_file = os.path.join(output_folder, f\"invoice_analysis_result_{timestamp}.json\")\n", " \n", " with open(output_file, 'w') as f:\n", " json.dump(analysis_result, f, indent=2)\n", @@ -672,7 +676,7 @@ "source": [ "**Clean Up Invoice Analyzer**\n", "\n", - "Clean up the analyzer to manage resources (in production, you would typically keep analyzers for reuse):" + "Clean up the analyzer to manage resources. (In production, you would typically keep analyzers for reuse):" ] }, { @@ -695,14 +699,13 @@ "\n", "🎉 **Congratulations!** You've successfully completed the field extraction tutorial for Azure AI Content Understanding!\n", "\n", - "\n", "## Next Steps\n", "\n", - "- **Try Other Notebooks**: \n", - " - `content_extraction.ipynb` - Multi-modal content extraction (audio, video, images)\n", - " - `conversational_field_extraction.ipynb` - Extract fields from audio conversations\n", - " - `management.ipynb` - Advanced analyzer management operations\n", - "- **Read the Documentation**: Visit the [Azure AI Content Understanding documentation](https://learn.microsoft.com/azure/ai-services/content-understanding/) for comprehensive guides and API references" + "- **Try Other Notebooks:**\n", + " - `content_extraction.ipynb` — Multi-modal content extraction (audio, video, images)\n", + " - `conversational_field_extraction.ipynb` — Extract fields from audio conversations\n", + " - `management.ipynb` — Advanced analyzer management operations\n", + "- **Read the Documentation:** Visit the [Azure AI Content Understanding documentation](https://learn.microsoft.com/azure/ai-services/content-understanding/) for comprehensive guides and API references.\n" ] } ], @@ -727,4 +730,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file