diff --git a/docs/colab_notebooks/1-the-basics.ipynb b/docs/colab_notebooks/1-the-basics.ipynb index 9a2456e6..68577e58 100644 --- a/docs/colab_notebooks/1-the-basics.ipynb +++ b/docs/colab_notebooks/1-the-basics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "00c21026", + "id": "f80a5317", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: The Basics\n", @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "ece3d9a9", + "id": "f94e1097", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -26,7 +26,7 @@ }, { "cell_type": "markdown", - "id": "38d1b88f", + "id": "847f7d3b", "metadata": {}, "source": [ "### ⚡ Colab Setup\n", @@ -37,7 +37,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53321634", + "id": "ef72fc77", "metadata": {}, "outputs": [], "source": [ @@ -48,7 +48,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5e8544d6", + "id": "cfa5d568", "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4a9e48bc", + "id": "dc6a83bf", "metadata": {}, "outputs": [], "source": [ @@ -76,7 +76,7 @@ }, { "cell_type": "markdown", - "id": "21b12719", + "id": "1d2f7999", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", @@ -89,7 +89,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7d689c22", + "id": "a073af81", "metadata": {}, "outputs": [], "source": [ @@ -98,7 +98,7 @@ }, { "cell_type": "markdown", - "id": "3db3eab3", + "id": "92c42c7d", "metadata": {}, "source": [ "### 🎛️ Define model configurations\n", @@ -115,7 +115,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f4447bbe", + "id": "45ae1dbf", "metadata": {}, "outputs": [], "source": [ @@ -145,7 +145,7 @@ }, { "cell_type": "markdown", - "id": "b5af9991", + "id": "35d0b3af", "metadata": {}, "source": [ "### 🏗️ Initialize the Data Designer Config Builder\n", @@ -160,7 +160,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40bdb697", + "id": "0c957c0b", "metadata": {}, "outputs": [], "source": [ @@ -169,7 +169,7 @@ }, { "cell_type": "markdown", - "id": "4dad8aa0", + "id": "132e10d6", "metadata": {}, "source": [ "## 🎲 Getting started with sampler columns\n", @@ -186,7 +186,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8eecf6e8", + "id": "b62b30fd", "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "markdown", - "id": "e4d6a23a", + "id": "04df17aa", "metadata": {}, "source": [ "Let's start designing our product review dataset by adding product category and subcategory columns.\n" @@ -204,7 +204,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c3ce7276", + "id": "cb62478e", "metadata": {}, "outputs": [], "source": [ @@ -285,7 +285,7 @@ }, { "cell_type": "markdown", - "id": "a8aafd2c", + "id": "889beb4b", "metadata": {}, "source": [ "Next, let's add samplers to generate data related to the customer and their review.\n" @@ -294,7 +294,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3bdb3991", + "id": "e2746105", "metadata": {}, "outputs": [], "source": [ @@ -331,7 +331,7 @@ }, { "cell_type": "markdown", - "id": "743bb645", + "id": "9fcf1a92", "metadata": {}, "source": [ "## 🦜 LLM-generated columns\n", @@ -346,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "da2b9677", + "id": "6e6ac591", "metadata": {}, "outputs": [], "source": [ @@ -382,7 +382,7 @@ }, { "cell_type": "markdown", - "id": "febed040", + "id": "35332948", "metadata": {}, "source": [ "### 🔁 Iteration is key – preview the dataset!\n", @@ -399,7 +399,7 @@ { "cell_type": "code", "execution_count": null, - "id": "af574e1c", + "id": "e2830ad2", "metadata": {}, "outputs": [], "source": [ @@ -409,7 +409,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c5cddea8", + "id": "911fecd7", "metadata": {}, "outputs": [], "source": [ @@ -420,7 +420,7 @@ { "cell_type": "code", "execution_count": null, - "id": "523da02f", + "id": "46faf8e9", "metadata": {}, "outputs": [], "source": [ @@ -430,7 +430,7 @@ }, { "cell_type": "markdown", - "id": "b58b6a23", + "id": "3565f974", "metadata": {}, "source": [ "### 📊 Analyze the generated data\n", @@ -443,7 +443,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26b9a54a", + "id": "6effb2c0", "metadata": {}, "outputs": [], "source": [ @@ -453,7 +453,7 @@ }, { "cell_type": "markdown", - "id": "ae2f9efe", + "id": "5b63d3ec", "metadata": {}, "source": [ "### 🆙 Scale up!\n", @@ -466,7 +466,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d8341c24", + "id": "0214e011", "metadata": {}, "outputs": [], "source": [ @@ -476,7 +476,7 @@ { "cell_type": "code", "execution_count": null, - "id": "746166bb", + "id": "11560d0f", "metadata": {}, "outputs": [], "source": [ @@ -489,7 +489,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c67992b", + "id": "246f210c", "metadata": {}, "outputs": [], "source": [ @@ -501,7 +501,7 @@ }, { "cell_type": "markdown", - "id": "65da8b83", + "id": "f9f91c1d", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb index 75e2d72d..f670feea 100644 --- a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +++ b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "3d5ec9c5", + "id": "5bac4b52", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "3813ccb2", + "id": "c0710263", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "86173a51", + "id": "f32b881a", "metadata": {}, "source": [ "### ⚡ Colab Setup\n", @@ -39,7 +39,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6ee5a0e0", + "id": "0a57621e", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ { "cell_type": "code", "execution_count": null, - "id": "87742e65", + "id": "c3331d72", "metadata": {}, "outputs": [], "source": [ @@ -68,7 +68,7 @@ { "cell_type": "code", "execution_count": null, - "id": "450a862c", + "id": "7a3086f1", "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "markdown", - "id": "8f06cd05", + "id": "74b7ff91", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", @@ -91,7 +91,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9a880c00", + "id": "c927b6fd", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "d862ae5c", + "id": "bbbbd8ac", "metadata": {}, "source": [ "### 🎛️ Define model configurations\n", @@ -117,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "84e6f76a", + "id": "1ac7c15c", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ }, { "cell_type": "markdown", - "id": "07b038aa", + "id": "78b73f85", "metadata": {}, "source": [ "### 🏗️ Initialize the Data Designer Config Builder\n", @@ -162,7 +162,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7e42df4", + "id": "86bbab0b", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "600127e0", + "id": "f2061354", "metadata": {}, "source": [ "### 🧑‍🎨 Designing our data\n", @@ -198,7 +198,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ecebc077", + "id": "24b949c8", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +226,7 @@ }, { "cell_type": "markdown", - "id": "6f24c511", + "id": "13a68f51", "metadata": {}, "source": [ "Next, let's design our product review dataset using a few more tricks compared to the previous notebook.\n" @@ -235,7 +235,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6cd4a4a5", + "id": "47b1cceb", "metadata": {}, "outputs": [], "source": [ @@ -344,7 +344,7 @@ }, { "cell_type": "markdown", - "id": "3fa250c7", + "id": "06485787", "metadata": {}, "source": [ "Next, we will use more advanced Jinja expressions to create new columns.\n", @@ -361,7 +361,7 @@ { "cell_type": "code", "execution_count": null, - "id": "77895d82", + "id": "ca23ae49", "metadata": {}, "outputs": [], "source": [ @@ -414,7 +414,7 @@ }, { "cell_type": "markdown", - "id": "236f32c0", + "id": "4f2ffc03", "metadata": {}, "source": [ "### 🔁 Iteration is key – preview the dataset!\n", @@ -431,7 +431,7 @@ { "cell_type": "code", "execution_count": null, - "id": "719d3d7f", + "id": "6ab1fc06", "metadata": {}, "outputs": [], "source": [ @@ -441,7 +441,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d25b2a23", + "id": "774bd1d1", "metadata": {}, "outputs": [], "source": [ @@ -452,7 +452,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8cfff7c2", + "id": "58748c12", "metadata": {}, "outputs": [], "source": [ @@ -462,7 +462,7 @@ }, { "cell_type": "markdown", - "id": "acfc4317", + "id": "3ad394fa", "metadata": {}, "source": [ "### 📊 Analyze the generated data\n", @@ -475,7 +475,7 @@ { "cell_type": "code", "execution_count": null, - "id": "02a90c0a", + "id": "cc1b82f9", "metadata": {}, "outputs": [], "source": [ @@ -485,7 +485,7 @@ }, { "cell_type": "markdown", - "id": "60bac583", + "id": "c925f5c6", "metadata": {}, "source": [ "### 🆙 Scale up!\n", @@ -498,7 +498,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fd92ca3c", + "id": "87ad537b", "metadata": {}, "outputs": [], "source": [ @@ -508,7 +508,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ca5eded6", + "id": "2deea3fe", "metadata": {}, "outputs": [], "source": [ @@ -521,7 +521,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29f4b884", + "id": "2fc4d912", "metadata": {}, "outputs": [], "source": [ @@ -533,7 +533,7 @@ }, { "cell_type": "markdown", - "id": "18914be2", + "id": "08463ecf", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb index 91c13986..0fada303 100644 --- a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +++ b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "30b0205f", + "id": "ff432315", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "fd7184e7", + "id": "e0fb123d", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "f229a5f3", + "id": "3d7e201e", "metadata": {}, "source": [ "### ⚡ Colab Setup\n", @@ -39,7 +39,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3cfdeadf", + "id": "f03687a8", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8ad3bee9", + "id": "be915b25", "metadata": {}, "outputs": [], "source": [ @@ -68,7 +68,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7a8d675", + "id": "6fb58d20", "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "markdown", - "id": "e52b2806", + "id": "6f95f1d9", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", @@ -91,7 +91,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21ad21d1", + "id": "77e88553", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "e313e1c7", + "id": "7dd437a5", "metadata": {}, "source": [ "### 🎛️ Define model configurations\n", @@ -117,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5927e232", + "id": "ec9d1d7d", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ }, { "cell_type": "markdown", - "id": "3fe284f0", + "id": "e38e3122", "metadata": {}, "source": [ "### 🏗️ Initialize the Data Designer Config Builder\n", @@ -162,7 +162,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0475564b", + "id": "d65ab694", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "588837c2", + "id": "cbc9838f", "metadata": {}, "source": [ "## 🏥 Prepare a seed dataset\n", @@ -196,7 +196,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e8dfb164", + "id": "7adf53e5", "metadata": {}, "outputs": [], "source": [ @@ -214,7 +214,7 @@ }, { "cell_type": "markdown", - "id": "ca5f46ea", + "id": "03a770cc", "metadata": {}, "source": [ "## 🎨 Designing our synthetic patient notes dataset\n", @@ -227,7 +227,7 @@ { "cell_type": "code", "execution_count": null, - "id": "830810e8", + "id": "93843a3f", "metadata": {}, "outputs": [], "source": [ @@ -308,7 +308,7 @@ }, { "cell_type": "markdown", - "id": "cbb1e2ad", + "id": "248168ed", "metadata": {}, "source": [ "### 🔁 Iteration is key – preview the dataset!\n", @@ -325,7 +325,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f9c39104", + "id": "303dea91", "metadata": {}, "outputs": [], "source": [ @@ -335,7 +335,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5750e220", + "id": "17246444", "metadata": {}, "outputs": [], "source": [ @@ -346,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b3573753", + "id": "c0a85570", "metadata": {}, "outputs": [], "source": [ @@ -356,7 +356,7 @@ }, { "cell_type": "markdown", - "id": "14937896", + "id": "4e04abd0", "metadata": {}, "source": [ "### 📊 Analyze the generated data\n", @@ -369,7 +369,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cd3adb37", + "id": "05e6507f", "metadata": {}, "outputs": [], "source": [ @@ -379,7 +379,7 @@ }, { "cell_type": "markdown", - "id": "aa4fee79", + "id": "71ca31b0", "metadata": {}, "source": [ "### 🆙 Scale up!\n", @@ -392,7 +392,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29024ffc", + "id": "301bddf2", "metadata": {}, "outputs": [], "source": [ @@ -402,7 +402,7 @@ { "cell_type": "code", "execution_count": null, - "id": "73da6149", + "id": "92076fc7", "metadata": {}, "outputs": [], "source": [ @@ -415,7 +415,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bc2f927d", + "id": "ee8e8b31", "metadata": {}, "outputs": [], "source": [ @@ -427,7 +427,7 @@ }, { "cell_type": "markdown", - "id": "29990c5d", + "id": "58e0ec67", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb index cc10ec63..d8b1e851 100644 --- a/docs/colab_notebooks/4-providing-images-as-context.ipynb +++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "911877e5", + "id": "187ae39b", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "c6756afd", + "id": "affdec51", "metadata": {}, "source": [ "#### 📚 What you'll learn\n", @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "d73b25ce", + "id": "fb2ab96c", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -37,7 +37,7 @@ }, { "cell_type": "markdown", - "id": "f05ece3e", + "id": "64661d08", "metadata": {}, "source": [ "### ⚡ Colab Setup\n", @@ -48,7 +48,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d84f4489", + "id": "52dd1f6e", "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5e4cc2d4", + "id": "73031da5", "metadata": {}, "outputs": [], "source": [ @@ -77,7 +77,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4e4e8d45", + "id": "2bf90705", "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "0cdd2a8a", + "id": "2c2acaba", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", @@ -113,7 +113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4bb0ca16", + "id": "37c9aeac", "metadata": {}, "outputs": [], "source": [ @@ -122,7 +122,7 @@ }, { "cell_type": "markdown", - "id": "bd17820d", + "id": "be82bbc3", "metadata": {}, "source": [ "### 🎛️ Define model configurations\n", @@ -139,7 +139,7 @@ { "cell_type": "code", "execution_count": null, - "id": "301f2bd2", + "id": "8a5f3290", "metadata": {}, "outputs": [], "source": [ @@ -162,7 +162,7 @@ }, { "cell_type": "markdown", - "id": "ad04f82a", + "id": "21b9d39a", "metadata": {}, "source": [ "### 🏗️ Initialize the Data Designer Config Builder\n", @@ -177,7 +177,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ac8e2885", + "id": "9a83d258", "metadata": {}, "outputs": [], "source": [ @@ -186,7 +186,7 @@ }, { "cell_type": "markdown", - "id": "7b8aafc0", + "id": "253e3ebd", "metadata": {}, "source": [ "### 🌱 Seed Dataset Creation\n", @@ -203,7 +203,7 @@ { "cell_type": "code", "execution_count": null, - "id": "432edd4a", + "id": "e907709a", "metadata": {}, "outputs": [], "source": [ @@ -218,7 +218,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c4f94627", + "id": "1cdbec46", "metadata": {}, "outputs": [], "source": [ @@ -266,7 +266,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9b697311", + "id": "c398fa28", "metadata": {}, "outputs": [], "source": [ @@ -284,7 +284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bcfc97e8", + "id": "9af5294d", "metadata": {}, "outputs": [], "source": [ @@ -294,7 +294,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0a3bdc13", + "id": "0b629ce2", "metadata": {}, "outputs": [], "source": [ @@ -306,7 +306,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f9665355", + "id": "e438dc74", "metadata": {}, "outputs": [], "source": [ @@ -320,13 +320,7 @@ " \"Start from the top of the image and then describe it from top to bottom. \"\n", " \"Place a summary at the bottom.\"\n", " ),\n", - " multi_modal_context=[\n", - " dd.ImageContext(\n", - " column_name=\"base64_image\",\n", - " data_type=dd.ModalityDataType.BASE64,\n", - " image_format=dd.ImageFormat.PNG,\n", - " )\n", - " ],\n", + " multi_modal_context=[dd.ImageContext(column_name=\"base64_image\")],\n", " )\n", ")\n", "\n", @@ -335,7 +329,7 @@ }, { "cell_type": "markdown", - "id": "6d900aaa", + "id": "870969be", "metadata": {}, "source": [ "### 🔁 Iteration is key – preview the dataset!\n", @@ -352,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51a80346", + "id": "218b773b", "metadata": {}, "outputs": [], "source": [ @@ -362,7 +356,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ea217964", + "id": "bf4a040c", "metadata": {}, "outputs": [], "source": [ @@ -373,7 +367,7 @@ { "cell_type": "code", "execution_count": null, - "id": "be0e4ef0", + "id": "2d3e0129", "metadata": {}, "outputs": [], "source": [ @@ -383,7 +377,7 @@ }, { "cell_type": "markdown", - "id": "0c75f531", + "id": "6c3d748f", "metadata": {}, "source": [ "### 📊 Analyze the generated data\n", @@ -396,7 +390,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bcbf86d1", + "id": "210d10b7", "metadata": {}, "outputs": [], "source": [ @@ -406,7 +400,7 @@ }, { "cell_type": "markdown", - "id": "0ab35029", + "id": "52a54996", "metadata": {}, "source": [ "### 🔎 Visual Inspection\n", @@ -417,7 +411,7 @@ { "cell_type": "code", "execution_count": null, - "id": "03314ae9", + "id": "c8182b42", "metadata": { "lines_to_next_cell": 2 }, @@ -441,7 +435,7 @@ }, { "cell_type": "markdown", - "id": "e76a3e3b", + "id": "3252607d", "metadata": {}, "source": [ "### 🆙 Scale up!\n", @@ -454,7 +448,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d16566c0", + "id": "9e08d6af", "metadata": {}, "outputs": [], "source": [ @@ -464,7 +458,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8e7796ba", + "id": "614e167b", "metadata": {}, "outputs": [], "source": [ @@ -477,7 +471,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14bc1042", + "id": "e8672292", "metadata": {}, "outputs": [], "source": [ @@ -489,7 +483,7 @@ }, { "cell_type": "markdown", - "id": "1e676330", + "id": "252d95b4", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/5-generating-images.ipynb b/docs/colab_notebooks/5-generating-images.ipynb index ea9e0b8f..bc73ced9 100644 --- a/docs/colab_notebooks/5-generating-images.ipynb +++ b/docs/colab_notebooks/5-generating-images.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "3b8abde3", + "id": "5b22efa0", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Generating Images\n", @@ -24,7 +24,7 @@ }, { "cell_type": "markdown", - "id": "1da8d75f", + "id": "f207429e", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -35,7 +35,7 @@ }, { "cell_type": "markdown", - "id": "cc461005", + "id": "48efeb65", "metadata": {}, "source": [ "### ⚡ Colab Setup\n", @@ -46,7 +46,7 @@ { "cell_type": "code", "execution_count": null, - "id": "206037bf", + "id": "5ed20bca", "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ { "cell_type": "code", "execution_count": null, - "id": "db5a4929", + "id": "346da1f0", "metadata": {}, "outputs": [], "source": [ @@ -75,7 +75,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b3cba8b6", + "id": "5bae7c57", "metadata": {}, "outputs": [], "source": [ @@ -88,7 +88,7 @@ }, { "cell_type": "markdown", - "id": "444aa9dc", + "id": "dbabda58", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", @@ -99,7 +99,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1932342c", + "id": "102258ce", "metadata": {}, "outputs": [], "source": [ @@ -108,7 +108,7 @@ }, { "cell_type": "markdown", - "id": "aa7b90c5", + "id": "5ffa9e6b", "metadata": {}, "source": [ "### 🎛️ Define an image-generation model\n", @@ -120,7 +120,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df7e4385", + "id": "4df5a31f", "metadata": {}, "outputs": [], "source": [ @@ -142,7 +142,7 @@ }, { "cell_type": "markdown", - "id": "a1325e38", + "id": "414f7eaa", "metadata": {}, "source": [ "### 🏗️ Build the config: samplers + image column\n", @@ -153,7 +153,7 @@ { "cell_type": "code", "execution_count": null, - "id": "95064ed0", + "id": "422c321f", "metadata": {}, "outputs": [], "source": [ @@ -326,7 +326,7 @@ }, { "cell_type": "markdown", - "id": "c6fe0620", + "id": "e51179cc", "metadata": {}, "source": [ "### 🔁 Preview: images as base64\n", @@ -337,7 +337,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7323dce5", + "id": "0a41ddd8", "metadata": {}, "outputs": [], "source": [ @@ -347,7 +347,7 @@ { "cell_type": "code", "execution_count": null, - "id": "510b933c", + "id": "b349bf36", "metadata": {}, "outputs": [], "source": [ @@ -358,7 +358,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0c8c197f", + "id": "adf4b54c", "metadata": {}, "outputs": [], "source": [ @@ -367,7 +367,7 @@ }, { "cell_type": "markdown", - "id": "4cffd205", + "id": "0c74fadd", "metadata": {}, "source": [ "### 🆙 Create: images saved to disk\n", @@ -378,7 +378,7 @@ { "cell_type": "code", "execution_count": null, - "id": "308bf2b8", + "id": "fc15a7cf", "metadata": {}, "outputs": [], "source": [ @@ -388,7 +388,7 @@ { "cell_type": "code", "execution_count": null, - "id": "02610965", + "id": "ab725b7c", "metadata": {}, "outputs": [], "source": [ @@ -399,7 +399,7 @@ { "cell_type": "code", "execution_count": null, - "id": "189af389", + "id": "7b70baf2", "metadata": {}, "outputs": [], "source": [ @@ -415,7 +415,7 @@ }, { "cell_type": "markdown", - "id": "51558182", + "id": "daf6ae58", "metadata": {}, "source": [ "## ⏭️ Next steps\n", diff --git a/docs/colab_notebooks/6-editing-images-with-image-context.ipynb b/docs/colab_notebooks/6-editing-images-with-image-context.ipynb index ddfe9d37..32e1468b 100644 --- a/docs/colab_notebooks/6-editing-images-with-image-context.ipynb +++ b/docs/colab_notebooks/6-editing-images-with-image-context.ipynb @@ -2,21 +2,20 @@ "cells": [ { "cell_type": "markdown", - "id": "c7129daf", + "id": "40e16af1", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Image-to-Image Editing\n", "\n", "#### 📚 What you'll learn\n", "\n", - "This notebook shows how to edit existing images by combining a seed dataset with image generation. You'll load animal portrait photographs from HuggingFace, feed them as context to an autoregressive model, and generate fun edited versions with accessories like sunglasses, top hats, and bow ties.\n", + "This notebook shows how to chain image generation columns: first generate animal portraits from text, then edit those generated images by adding accessories and changing styles—all without loading external datasets.\n", "\n", - "- 🌱 **Seed datasets with images**: Load a HuggingFace image dataset and use it as a seed\n", - "- 🖼️ **Image context for editing**: Pass existing images to an image-generation model via `multi_modal_context`\n", - "- 🎲 **Sampler-driven diversity**: Combine sampled accessories and settings with seed images for varied results\n", - "- 💾 **Preview vs create**: Preview stores base64 in the dataframe; create saves images to disk\n", + "- 🖼️ **Text-to-image generation**: Generate images from text prompts\n", + "- 🔗 **Chaining image columns**: Use `ImageContext` to pass generated images to a follow-up editing column\n", + "- 🎲 **Sampler-driven diversity**: Combine sampled accessories and settings for varied edits\n", "\n", - "This tutorial uses an **autoregressive** model (one that supports both image input *and* image output via the chat completions API). Diffusion models (DALL·E, Stable Diffusion, etc.) do not support image context—see [Tutorial 5](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) for text-to-image generation with diffusion models.\n", + "This tutorial uses an **autoregressive** model (one that supports both text-to-image *and* image-to-image generation via the chat completions API). Diffusion models (DALL·E, Stable Diffusion, etc.) do not support image context—see [Tutorial 5](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) for text-to-image generation with diffusion models.\n", "\n", "> **Prerequisites**: This tutorial uses [OpenRouter](https://openrouter.ai) with the Flux 2 Pro model. Set `OPENROUTER_API_KEY` in your environment before running.\n", "\n", @@ -25,7 +24,7 @@ }, { "cell_type": "markdown", - "id": "6a438ee3", + "id": "846d9aad", "metadata": {}, "source": [ "### 📦 Import Data Designer\n", @@ -36,7 +35,7 @@ }, { "cell_type": "markdown", - "id": "1a022157", + "id": "738842c8", "metadata": {}, "source": [ "### ⚡ Colab Setup\n", @@ -47,7 +46,7 @@ { "cell_type": "code", "execution_count": null, - "id": "752fe3eb", + "id": "c0f03293", "metadata": {}, "outputs": [], "source": [ @@ -58,7 +57,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49266cc2", + "id": "a65be55d", "metadata": {}, "outputs": [], "source": [ @@ -76,16 +75,13 @@ { "cell_type": "code", "execution_count": null, - "id": "d87dfa0b", + "id": "f8970dcf", "metadata": {}, "outputs": [], "source": [ "import base64\n", - "import io\n", - "import uuid\n", + "from pathlib import Path\n", "\n", - "import pandas as pd\n", - "from datasets import load_dataset\n", "from IPython.display import Image as IPImage\n", "from IPython.display import display\n", "\n", @@ -95,18 +91,18 @@ }, { "cell_type": "markdown", - "id": "c99ff426", + "id": "ad9c7aec", "metadata": {}, "source": [ "### ⚙️ Initialize the Data Designer interface\n", "\n", - "We initialize Data Designer without arguments here—the image-editing model is configured explicitly in the next cell. No default text model is needed for this tutorial.\n" + "We initialize Data Designer without arguments here—the image model is configured explicitly in the next cell.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "9be6231b", + "id": "ac6f7185", "metadata": {}, "outputs": [], "source": [ @@ -115,12 +111,12 @@ }, { "cell_type": "markdown", - "id": "3e242b51", + "id": "64c18e7a", "metadata": {}, "source": [ - "### 🎛️ Define an image-editing model\n", + "### 🎛️ Define an image model\n", "\n", - "We need an **autoregressive** model that supports both image input and image output via the chat completions API. This lets us pass existing images as context and receive edited images back.\n", + "We need an **autoregressive** model that supports both text-to-image and image-to-image generation via the chat completions API. This lets us generate images from text and then pass those images as context for editing.\n", "\n", "- Use `ImageInferenceParams` so Data Designer treats this model as an image generator.\n", "- Image-specific options are model-dependent; pass them via `extra_body`.\n", @@ -131,13 +127,13 @@ { "cell_type": "code", "execution_count": null, - "id": "34dd8eed", + "id": "52c88de0", "metadata": {}, "outputs": [], "source": [ "MODEL_PROVIDER = \"openrouter\"\n", "MODEL_ID = \"black-forest-labs/flux.2-pro\"\n", - "MODEL_ALIAS = \"image-editor\"\n", + "MODEL_ALIAS = \"image-model\"\n", "\n", "model_configs = [\n", " dd.ModelConfig(\n", @@ -153,96 +149,38 @@ }, { "cell_type": "markdown", - "id": "98abe1a9", - "metadata": {}, - "source": [ - "### 🌱 Load animal portraits from HuggingFace\n", - "\n", - "We'll load animal face photographs from the [AFHQ](https://huggingface.co/datasets/huggan/AFHQv2) (Animal Faces-HQ) dataset, convert them to base64, and use them as a seed dataset.\n", - "\n", - "AFHQ contains high-quality 512×512 close-up portraits of cats, dogs, and wildlife—perfect subjects for adding fun accessories.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "233f483b", - "metadata": {}, - "outputs": [], - "source": [ - "SEED_COUNT = 10\n", - "BASE64_IMAGE_HEIGHT = 512\n", - "\n", - "ANIMAL_LABELS = {0: \"cat\", 1: \"dog\", 2: \"wild\"}\n", - "\n", - "\n", - "def resize_image(image, height: int):\n", - " \"\"\"Resize image maintaining aspect ratio.\"\"\"\n", - " original_width, original_height = image.size\n", - " width = int(original_width * (height / original_height))\n", - " return image.resize((width, height))\n", - "\n", - "\n", - "def prepare_record(record: dict, height: int) -> dict:\n", - " \"\"\"Convert a HuggingFace record to base64 with metadata.\"\"\"\n", - " image = resize_image(record[\"image\"], height)\n", - " img_buffer = io.BytesIO()\n", - " image.save(img_buffer, format=\"PNG\")\n", - " base64_string = base64.b64encode(img_buffer.getvalue()).decode(\"utf-8\")\n", - " return {\n", - " \"uuid\": str(uuid.uuid4()),\n", - " \"base64_image\": base64_string,\n", - " \"animal\": ANIMAL_LABELS[record[\"label\"]],\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6b1a7b59", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"📥 Streaming animal portraits from HuggingFace...\")\n", - "hf_dataset = load_dataset(\"huggan/AFHQv2\", split=\"train\", streaming=True)\n", - "\n", - "hf_iter = iter(hf_dataset)\n", - "records = [prepare_record(next(hf_iter), BASE64_IMAGE_HEIGHT) for _ in range(SEED_COUNT)]\n", - "df_seed = pd.DataFrame(records)\n", - "\n", - "print(f\"✅ Prepared {len(df_seed)} animal portraits with columns: {list(df_seed.columns)}\")\n", - "df_seed.head()" - ] - }, - { - "cell_type": "markdown", - "id": "2956a5a6", + "id": "b4883011", "metadata": {}, "source": [ "### 🏗️ Build the configuration\n", "\n", - "We combine three ingredients:\n", + "We chain two image generation columns:\n", "\n", - "1. **Seed dataset** — original animal portraits as base64 and their species labels\n", - "2. **Sampler columns** — randomly sample accessories and settings for each image\n", - "3. **Image column with context** — generate an edited image using the original as reference\n", - "\n", - "The `multi_modal_context` parameter on `ImageColumnConfig` tells Data Designer to pass the seed image to the model alongside the text prompt. The model receives both the image and the editing instructions, and generates a new image.\n" + "1. **Sampler columns** — randomly sample animal types, accessories, settings, and art styles\n", + "2. **First image column** — generate an animal portrait from a text prompt\n", + "3. **Second image column with context** — edit the generated portrait using `ImageContext`\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "f79ffa72", + "id": "0b54bf99", "metadata": {}, "outputs": [], "source": [ "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)\n", "\n", - "# 1. Seed the original animal portraits\n", - "config_builder.with_seed_dataset(dd.DataFrameSeedSource(df=df_seed))\n", + "# 1. Sampler columns for diversity\n", + "config_builder.add_column(\n", + " dd.SamplerColumnConfig(\n", + " name=\"animal\",\n", + " sampler_type=dd.SamplerType.CATEGORY,\n", + " params=dd.CategorySamplerParams(\n", + " values=[\"cat\", \"dog\", \"fox\", \"owl\", \"rabbit\", \"panda\"],\n", + " ),\n", + " )\n", + ")\n", "\n", - "# 2. Add sampler columns for accessory diversity\n", "config_builder.add_column(\n", " dd.SamplerColumnConfig(\n", " name=\"accessory\",\n", @@ -294,10 +232,19 @@ " )\n", ")\n", "\n", - "# 3. Image column that reads the seed image as context and generates an edited version\n", + "# 2. Generate animal portrait from text\n", + "config_builder.add_column(\n", + " dd.ImageColumnConfig(\n", + " name=\"animal_portrait\",\n", + " prompt=\"A close-up portrait photograph of a {{ animal }} looking at the camera, studio lighting, high quality.\",\n", + " model_alias=MODEL_ALIAS,\n", + " )\n", + ")\n", + "\n", + "# 3. Edit the generated portrait\n", "config_builder.add_column(\n", " dd.ImageColumnConfig(\n", - " name=\"edited_image\",\n", + " name=\"edited_portrait\",\n", " prompt=(\n", " \"Edit this {{ animal }} portrait photo. \"\n", " \"Add {{ accessory }} on the animal. \"\n", @@ -306,13 +253,7 @@ " \"Keep the animal's face, expression, and features faithful to the original photo.\"\n", " ),\n", " model_alias=MODEL_ALIAS,\n", - " multi_modal_context=[\n", - " dd.ImageContext(\n", - " column_name=\"base64_image\",\n", - " data_type=dd.ModalityDataType.BASE64,\n", - " image_format=dd.ImageFormat.PNG,\n", - " )\n", - " ],\n", + " multi_modal_context=[dd.ImageContext(column_name=\"animal_portrait\")],\n", " )\n", ")\n", "\n", @@ -321,7 +262,7 @@ }, { "cell_type": "markdown", - "id": "0cba69c0", + "id": "85516cec", "metadata": {}, "source": [ "### 🔁 Preview: quick iteration\n", @@ -332,7 +273,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ec669ae2", + "id": "4e922030", "metadata": {}, "outputs": [], "source": [ @@ -342,7 +283,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41ac4a95", + "id": "534c5d07", "metadata": {}, "outputs": [], "source": [ @@ -353,7 +294,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6f041d9d", + "id": "6ff24137", "metadata": {}, "outputs": [], "source": [ @@ -362,56 +303,51 @@ }, { "cell_type": "markdown", - "id": "483fa24a", + "id": "9c9ab4f9", "metadata": { "lines_to_next_cell": 2 }, "source": [ "### 🔎 Compare original vs edited\n", "\n", - "Let's display the original animal portraits next to their accessorized versions.\n" + "Let's display the generated animal portraits next to their edited versions.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "dd4d7dff", + "id": "90a28381", "metadata": {}, "outputs": [], "source": [ - "def display_before_after(row: pd.Series, index: int, base_path=None) -> None:\n", - " \"\"\"Display original vs edited image for a single record.\n", - "\n", - " When base_path is None (preview mode), edited_image is decoded from base64.\n", - " When base_path is provided (create mode), edited_image is loaded from disk.\n", - " \"\"\"\n", + "def display_image(image_value, base_path: Path | None = None) -> None:\n", + " \"\"\"Display an image from base64 (preview mode) or file path (create mode).\"\"\"\n", + " values = image_value if isinstance(image_value, list) else [image_value]\n", + " for value in values:\n", + " if base_path is not None:\n", + " display(IPImage(filename=str(base_path / value)))\n", + " else:\n", + " display(IPImage(data=base64.b64decode(value)))\n", + "\n", + "\n", + "def display_before_after(row, index: int, base_path: Path | None = None) -> None:\n", + " \"\"\"Display original portrait vs edited version for a single record.\"\"\"\n", " print(f\"\\n{'=' * 60}\")\n", " print(f\"Record {index}: {row['animal']} wearing {row['accessory']}\")\n", - " print(f\"Setting: {row['setting']}\")\n", - " print(f\"Style: {row['art_style']}\")\n", + " print(f\"Setting: {row['setting']}, Style: {row['art_style']}\")\n", " print(f\"{'=' * 60}\")\n", "\n", - " print(\"\\n📷 Original portrait:\")\n", - " display(IPImage(data=base64.b64decode(row[\"base64_image\"])))\n", + " print(\"\\n📷 Generated portrait:\")\n", + " display_image(row[\"animal_portrait\"], base_path)\n", "\n", " print(\"\\n🎨 Edited version:\")\n", - " edited = row.get(\"edited_image\")\n", - " if edited is None:\n", - " return\n", - " if base_path is None:\n", - " images = edited if isinstance(edited, list) else [edited]\n", - " for img_b64 in images:\n", - " display(IPImage(data=base64.b64decode(img_b64)))\n", - " else:\n", - " paths = edited if not isinstance(edited, str) else [edited]\n", - " for path in paths:\n", - " display(IPImage(filename=str(base_path / path)))" + " display_image(row[\"edited_portrait\"], base_path)" ] }, { "cell_type": "code", "execution_count": null, - "id": "af08dc6c", + "id": "7dd69086", "metadata": {}, "outputs": [], "source": [ @@ -421,18 +357,18 @@ }, { "cell_type": "markdown", - "id": "9ee15c83", + "id": "e5089b88", "metadata": {}, "source": [ "### 🆙 Create at scale\n", "\n", - "In **create** mode, images are saved to disk in an `images//` folder with UUID filenames. The dataframe stores relative paths.\n" + "In **create** mode, images are saved to disk in `images//` folders with UUID filenames. The dataframe stores relative paths. `ImageContext` auto-detection handles this transparently—generated file paths are resolved to base64 before being sent to the model for editing.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "9f0d27f8", + "id": "fd91d3de", "metadata": {}, "outputs": [], "source": [ @@ -442,7 +378,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cc17414a", + "id": "aecf8f86", "metadata": {}, "outputs": [], "source": [ @@ -453,7 +389,7 @@ { "cell_type": "code", "execution_count": null, - "id": "849c03b6", + "id": "895767fd", "metadata": {}, "outputs": [], "source": [ @@ -463,14 +399,15 @@ }, { "cell_type": "markdown", - "id": "b7385f02", + "id": "ebe53fc8", "metadata": {}, "source": [ "## ⏭️ Next steps\n", "\n", - "- Experiment with different autoregressive models for image editing\n", + "- Experiment with different autoregressive models for image generation and editing\n", "- Try more creative editing prompts (style transfer, background replacement, artistic filters)\n", - "- Combine image editing with text generation (e.g., generate captions for edited images using an LLM-Text column)\n", + "- Combine image generation with text generation (e.g., generate captions using an LLM-Text column with `ImageContext`)\n", + "- Chain more than two image columns for multi-step editing pipelines\n", "\n", "Related tutorials:\n", "\n", diff --git a/docs/notebook_source/4-providing-images-as-context.py b/docs/notebook_source/4-providing-images-as-context.py index 1fd68dac..43c9efcd 100644 --- a/docs/notebook_source/4-providing-images-as-context.py +++ b/docs/notebook_source/4-providing-images-as-context.py @@ -196,13 +196,7 @@ def convert_image_to_chat_format(record, height: int) -> dict: "Start from the top of the image and then describe it from top to bottom. " "Place a summary at the bottom." ), - multi_modal_context=[ - dd.ImageContext( - column_name="base64_image", - data_type=dd.ModalityDataType.BASE64, - image_format=dd.ImageFormat.PNG, - ) - ], + multi_modal_context=[dd.ImageContext(column_name="base64_image")], ) ) diff --git a/docs/notebook_source/6-editing-images-with-image-context.py b/docs/notebook_source/6-editing-images-with-image-context.py index c419ad23..69040589 100644 --- a/docs/notebook_source/6-editing-images-with-image-context.py +++ b/docs/notebook_source/6-editing-images-with-image-context.py @@ -17,14 +17,13 @@ # # #### 📚 What you'll learn # -# This notebook shows how to edit existing images by combining a seed dataset with image generation. You'll load animal portrait photographs from HuggingFace, feed them as context to an autoregressive model, and generate fun edited versions with accessories like sunglasses, top hats, and bow ties. +# This notebook shows how to chain image generation columns: first generate animal portraits from text, then edit those generated images by adding accessories and changing styles—all without loading external datasets. # -# - 🌱 **Seed datasets with images**: Load a HuggingFace image dataset and use it as a seed -# - 🖼️ **Image context for editing**: Pass existing images to an image-generation model via `multi_modal_context` -# - 🎲 **Sampler-driven diversity**: Combine sampled accessories and settings with seed images for varied results -# - 💾 **Preview vs create**: Preview stores base64 in the dataframe; create saves images to disk +# - 🖼️ **Text-to-image generation**: Generate images from text prompts +# - 🔗 **Chaining image columns**: Use `ImageContext` to pass generated images to a follow-up editing column +# - 🎲 **Sampler-driven diversity**: Combine sampled accessories and settings for varied edits # -# This tutorial uses an **autoregressive** model (one that supports both image input *and* image output via the chat completions API). Diffusion models (DALL·E, Stable Diffusion, etc.) do not support image context—see [Tutorial 5](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) for text-to-image generation with diffusion models. +# This tutorial uses an **autoregressive** model (one that supports both text-to-image *and* image-to-image generation via the chat completions API). Diffusion models (DALL·E, Stable Diffusion, etc.) do not support image context—see [Tutorial 5](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) for text-to-image generation with diffusion models. # # > **Prerequisites**: This tutorial uses [OpenRouter](https://openrouter.ai) with the Flux 2 Pro model. Set `OPENROUTER_API_KEY` in your environment before running. # @@ -40,11 +39,8 @@ # %% import base64 -import io -import uuid +from pathlib import Path -import pandas as pd -from datasets import load_dataset from IPython.display import Image as IPImage from IPython.display import display @@ -54,16 +50,16 @@ # %% [markdown] # ### ⚙️ Initialize the Data Designer interface # -# We initialize Data Designer without arguments here—the image-editing model is configured explicitly in the next cell. No default text model is needed for this tutorial. +# We initialize Data Designer without arguments here—the image model is configured explicitly in the next cell. # # %% data_designer = DataDesigner() # %% [markdown] -# ### 🎛️ Define an image-editing model +# ### 🎛️ Define an image model # -# We need an **autoregressive** model that supports both image input and image output via the chat completions API. This lets us pass existing images as context and receive edited images back. +# We need an **autoregressive** model that supports both text-to-image and image-to-image generation via the chat completions API. This lets us generate images from text and then pass those images as context for editing. # # - Use `ImageInferenceParams` so Data Designer treats this model as an image generator. # - Image-specific options are model-dependent; pass them via `extra_body`. @@ -74,7 +70,7 @@ # %% MODEL_PROVIDER = "openrouter" MODEL_ID = "black-forest-labs/flux.2-pro" -MODEL_ALIAS = "image-editor" +MODEL_ALIAS = "image-model" model_configs = [ dd.ModelConfig( @@ -87,71 +83,30 @@ ) ] -# %% [markdown] -# ### 🌱 Load animal portraits from HuggingFace -# -# We'll load animal face photographs from the [AFHQ](https://huggingface.co/datasets/huggan/AFHQv2) (Animal Faces-HQ) dataset, convert them to base64, and use them as a seed dataset. -# -# AFHQ contains high-quality 512×512 close-up portraits of cats, dogs, and wildlife—perfect subjects for adding fun accessories. -# - -# %% -SEED_COUNT = 10 -BASE64_IMAGE_HEIGHT = 512 - -ANIMAL_LABELS = {0: "cat", 1: "dog", 2: "wild"} - - -def resize_image(image, height: int): - """Resize image maintaining aspect ratio.""" - original_width, original_height = image.size - width = int(original_width * (height / original_height)) - return image.resize((width, height)) - - -def prepare_record(record: dict, height: int) -> dict: - """Convert a HuggingFace record to base64 with metadata.""" - image = resize_image(record["image"], height) - img_buffer = io.BytesIO() - image.save(img_buffer, format="PNG") - base64_string = base64.b64encode(img_buffer.getvalue()).decode("utf-8") - return { - "uuid": str(uuid.uuid4()), - "base64_image": base64_string, - "animal": ANIMAL_LABELS[record["label"]], - } - - -# %% -print("📥 Streaming animal portraits from HuggingFace...") -hf_dataset = load_dataset("huggan/AFHQv2", split="train", streaming=True) - -hf_iter = iter(hf_dataset) -records = [prepare_record(next(hf_iter), BASE64_IMAGE_HEIGHT) for _ in range(SEED_COUNT)] -df_seed = pd.DataFrame(records) - -print(f"✅ Prepared {len(df_seed)} animal portraits with columns: {list(df_seed.columns)}") -df_seed.head() - # %% [markdown] # ### 🏗️ Build the configuration # -# We combine three ingredients: -# -# 1. **Seed dataset** — original animal portraits as base64 and their species labels -# 2. **Sampler columns** — randomly sample accessories and settings for each image -# 3. **Image column with context** — generate an edited image using the original as reference +# We chain two image generation columns: # -# The `multi_modal_context` parameter on `ImageColumnConfig` tells Data Designer to pass the seed image to the model alongside the text prompt. The model receives both the image and the editing instructions, and generates a new image. +# 1. **Sampler columns** — randomly sample animal types, accessories, settings, and art styles +# 2. **First image column** — generate an animal portrait from a text prompt +# 3. **Second image column with context** — edit the generated portrait using `ImageContext` # # %% config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) -# 1. Seed the original animal portraits -config_builder.with_seed_dataset(dd.DataFrameSeedSource(df=df_seed)) +# 1. Sampler columns for diversity +config_builder.add_column( + dd.SamplerColumnConfig( + name="animal", + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams( + values=["cat", "dog", "fox", "owl", "rabbit", "panda"], + ), + ) +) -# 2. Add sampler columns for accessory diversity config_builder.add_column( dd.SamplerColumnConfig( name="accessory", @@ -203,10 +158,19 @@ def prepare_record(record: dict, height: int) -> dict: ) ) -# 3. Image column that reads the seed image as context and generates an edited version +# 2. Generate animal portrait from text +config_builder.add_column( + dd.ImageColumnConfig( + name="animal_portrait", + prompt="A close-up portrait photograph of a {{ animal }} looking at the camera, studio lighting, high quality.", + model_alias=MODEL_ALIAS, + ) +) + +# 3. Edit the generated portrait config_builder.add_column( dd.ImageColumnConfig( - name="edited_image", + name="edited_portrait", prompt=( "Edit this {{ animal }} portrait photo. " "Add {{ accessory }} on the animal. " @@ -215,13 +179,7 @@ def prepare_record(record: dict, height: int) -> dict: "Keep the animal's face, expression, and features faithful to the original photo." ), model_alias=MODEL_ALIAS, - multi_modal_context=[ - dd.ImageContext( - column_name="base64_image", - data_type=dd.ModalityDataType.BASE64, - image_format=dd.ImageFormat.PNG, - ) - ], + multi_modal_context=[dd.ImageContext(column_name="animal_portrait")], ) ) @@ -246,38 +204,33 @@ def prepare_record(record: dict, height: int) -> dict: # %% [markdown] # ### 🔎 Compare original vs edited # -# Let's display the original animal portraits next to their accessorized versions. +# Let's display the generated animal portraits next to their edited versions. # # %% -def display_before_after(row: pd.Series, index: int, base_path=None) -> None: - """Display original vs edited image for a single record. - - When base_path is None (preview mode), edited_image is decoded from base64. - When base_path is provided (create mode), edited_image is loaded from disk. - """ +def display_image(image_value, base_path: Path | None = None) -> None: + """Display an image from base64 (preview mode) or file path (create mode).""" + values = image_value if isinstance(image_value, list) else [image_value] + for value in values: + if base_path is not None: + display(IPImage(filename=str(base_path / value))) + else: + display(IPImage(data=base64.b64decode(value))) + + +def display_before_after(row, index: int, base_path: Path | None = None) -> None: + """Display original portrait vs edited version for a single record.""" print(f"\n{'=' * 60}") print(f"Record {index}: {row['animal']} wearing {row['accessory']}") - print(f"Setting: {row['setting']}") - print(f"Style: {row['art_style']}") + print(f"Setting: {row['setting']}, Style: {row['art_style']}") print(f"{'=' * 60}") - print("\n📷 Original portrait:") - display(IPImage(data=base64.b64decode(row["base64_image"]))) + print("\n📷 Generated portrait:") + display_image(row["animal_portrait"], base_path) print("\n🎨 Edited version:") - edited = row.get("edited_image") - if edited is None: - return - if base_path is None: - images = edited if isinstance(edited, list) else [edited] - for img_b64 in images: - display(IPImage(data=base64.b64decode(img_b64))) - else: - paths = edited if not isinstance(edited, str) else [edited] - for path in paths: - display(IPImage(filename=str(base_path / path))) + display_image(row["edited_portrait"], base_path) # %% @@ -287,7 +240,7 @@ def display_before_after(row: pd.Series, index: int, base_path=None) -> None: # %% [markdown] # ### 🆙 Create at scale # -# In **create** mode, images are saved to disk in an `images//` folder with UUID filenames. The dataframe stores relative paths. +# In **create** mode, images are saved to disk in `images//` folders with UUID filenames. The dataframe stores relative paths. `ImageContext` auto-detection handles this transparently—generated file paths are resolved to base64 before being sent to the model for editing. # # %% @@ -304,9 +257,10 @@ def display_before_after(row: pd.Series, index: int, base_path=None) -> None: # %% [markdown] # ## ⏭️ Next steps # -# - Experiment with different autoregressive models for image editing +# - Experiment with different autoregressive models for image generation and editing # - Try more creative editing prompts (style transfer, background replacement, artistic filters) -# - Combine image editing with text generation (e.g., generate captions for edited images using an LLM-Text column) +# - Combine image generation with text generation (e.g., generate captions using an LLM-Text column with `ImageContext`) +# - Chain more than two image columns for multi-step editing pipelines # # Related tutorials: # diff --git a/docs/notebook_source/_README.md b/docs/notebook_source/_README.md index bbd29f9e..879e198f 100644 --- a/docs/notebook_source/_README.md +++ b/docs/notebook_source/_README.md @@ -108,12 +108,12 @@ Generate synthetic image data with Data Designer: ### [6. Image-to-Image Editing](6-editing-images-with-image-context.ipynb) -Edit existing images by combining seed datasets with image generation: +Chain image generation columns to generate and then edit images: -- Loading a HuggingFace image dataset and using it as a seed -- Passing existing images to an image-generation model via `multi_modal_context` -- Combining sampled accessories and settings with seed images for varied results -- Comparing original vs edited images in preview and create modes +- Generating images from text and then editing them in a follow-up column +- Using `ImageContext` with auto-detection to pass generated images to an editing model +- Combining sampled accessories and settings for varied edits +- Comparing generated vs edited images in preview and create modes ## 📖 Important Documentation Sections diff --git a/packages/data-designer-config/src/data_designer/config/__init__.py b/packages/data-designer-config/src/data_designer/config/__init__.py index 34cc4f44..a8b4fb35 100644 --- a/packages/data-designer-config/src/data_designer/config/__init__.py +++ b/packages/data-designer-config/src/data_designer/config/__init__.py @@ -41,7 +41,6 @@ EmbeddingInferenceParams, GenerationType, ImageContext, - ImageFormat, ImageInferenceParams, ManualDistribution, ManualDistributionParams, @@ -94,6 +93,7 @@ ) from data_designer.config.seed_source_dataframe import DataFrameSeedSource # noqa: F401 from data_designer.config.utils.code_lang import CodeLang # noqa: F401 + from data_designer.config.utils.image_helpers import ImageFormat # noqa: F401 from data_designer.config.utils.info import InfoType # noqa: F401 from data_designer.config.utils.trace_type import TraceType # noqa: F401 from data_designer.config.validator_params import ( # noqa: F401 @@ -152,7 +152,7 @@ "EmbeddingInferenceParams": (_MOD_MODELS, "EmbeddingInferenceParams"), "GenerationType": (_MOD_MODELS, "GenerationType"), "ImageContext": (_MOD_MODELS, "ImageContext"), - "ImageFormat": (_MOD_MODELS, "ImageFormat"), + "ImageFormat": (f"{_MOD_UTILS}.image_helpers", "ImageFormat"), "ImageInferenceParams": (_MOD_MODELS, "ImageInferenceParams"), "ManualDistribution": (_MOD_MODELS, "ManualDistribution"), "ManualDistributionParams": (_MOD_MODELS, "ManualDistributionParams"), diff --git a/packages/data-designer-config/src/data_designer/config/models.py b/packages/data-designer-config/src/data_designer/config/models.py index e13ef49c..578b34ee 100644 --- a/packages/data-designer-config/src/data_designer/config/models.py +++ b/packages/data-designer-config/src/data_designer/config/models.py @@ -22,6 +22,14 @@ MIN_TEMPERATURE, MIN_TOP_P, ) +from data_designer.config.utils.image_helpers import ( + ImageFormat, + decode_base64_image, + detect_image_format, + is_image_path, + is_image_url, + load_image_path_to_base64, +) from data_designer.config.utils.io_helpers import smart_load_yaml logger = logging.getLogger(__name__) @@ -40,16 +48,6 @@ class ModalityDataType(str, Enum): BASE64 = "base64" -class ImageFormat(str, Enum): - """Supported image formats for image modality.""" - - PNG = "png" - JPG = "jpg" - JPEG = "jpeg" - GIF = "gif" - WEBP = "webp" - - class DistributionType(str, Enum): """Types of distributions for sampling inference parameters.""" @@ -60,10 +58,10 @@ class DistributionType(str, Enum): class ModalityContext(ABC, BaseModel): modality: Modality column_name: str - data_type: ModalityDataType + data_type: ModalityDataType | None = None @abstractmethod - def get_contexts(self, record: dict) -> list[dict[str, Any]]: ... + def get_contexts(self, record: dict, *, base_path: str | None = None) -> list[dict[str, Any]]: ... class ImageContext(ModalityContext): @@ -72,14 +70,16 @@ class ImageContext(ModalityContext): Attributes: modality: The modality type (always "image"). column_name: Name of the column containing image data. - data_type: Format of the image data ("url" or "base64"). - image_format: Image format (required for base64 data). + data_type: Format of the image data ("url", "base64", or None for auto-detection). + When None, the format is auto-detected: URLs are passed through, file paths that + exist under base_path are loaded as base64, and other values are assumed to be base64. + image_format: Image format (required when data_type is explicitly "base64"). """ modality: Modality = Modality.IMAGE image_format: ImageFormat | None = None - def get_contexts(self, record: dict) -> list[dict[str, Any]]: + def get_contexts(self, record: dict, *, base_path: str | None = None) -> list[dict[str, Any]]: """Get the contexts for the image modality. Args: @@ -87,6 +87,10 @@ def get_contexts(self, record: dict) -> list[dict[str, Any]]: - A JSON serialized list of strings - A list of strings - A single string + base_path: Optional base path for resolving relative file paths. + When provided, file paths that exist under this directory are loaded + and converted to base64. This enables generated images (stored as relative + paths in create mode) to be sent to remote model endpoints. Returns: A list of image contexts. @@ -116,17 +120,54 @@ def get_contexts(self, record: dict) -> list[dict[str, Any]]: contexts = [] for context_value in context_values: context = dict(type="image_url") - if self.data_type == ModalityDataType.URL: - context["image_url"] = context_value + if self.data_type is not None: + # Explicit data_type: use existing behavior + if self.data_type == ModalityDataType.URL: + context["image_url"] = context_value + else: + context["image_url"] = { + "url": f"data:image/{self.image_format.value};base64,{context_value}", + "format": self.image_format.value, + } else: - context["image_url"] = { - "url": f"data:image/{self.image_format.value};base64,{context_value}", - "format": self.image_format.value, - } + # Auto-detect: resolve file paths, pass through URLs, assume base64 otherwise + context["image_url"] = self._auto_resolve_context_value(context_value, base_path) contexts.append(context) return contexts + def _auto_resolve_context_value(self, context_value: str, base_path: str | None) -> str | dict[str, str]: + """Auto-detect the format of a context value and resolve it. + + Resolution rules: + - File path that exists under base_path → load to base64 (generated artifact) + - URL (http/https) → pass through as-is + - Otherwise → assume base64 data + """ + if base_path is not None and is_image_path(context_value): + base64_data = load_image_path_to_base64(context_value, base_path=base_path) + if base64_data is not None: + return self._format_base64_context(base64_data) + + if is_image_url(context_value): + return context_value + + return self._format_base64_context(context_value) + + def _format_base64_context(self, base64_data: str) -> dict[str, str]: + """Format base64 image data as an image_url context dict. + + Uses self.image_format if set, otherwise detects from the image bytes. + """ + image_format = self.image_format + if image_format is None: + image_bytes = decode_base64_image(base64_data) + image_format = detect_image_format(image_bytes) + return { + "url": f"data:image/{image_format.value};base64,{base64_data}", + "format": image_format.value, + } + @model_validator(mode="after") def _validate_image_format(self) -> Self: if self.data_type == ModalityDataType.BASE64 and self.image_format is None: diff --git a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py index 908d3685..1f0e5c91 100644 --- a/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py +++ b/packages/data-designer-config/src/data_designer/config/utils/image_helpers.py @@ -13,7 +13,18 @@ import requests import data_designer.lazy_heavy_imports as lazy -from data_designer.config.models import ImageFormat +from data_designer.config.utils.type_helpers import StrEnum + + +class ImageFormat(StrEnum): + """Supported image formats for image modality.""" + + PNG = "png" + JPG = "jpg" + JPEG = "jpeg" + GIF = "gif" + WEBP = "webp" + # Magic bytes for image format detection IMAGE_FORMAT_MAGIC_BYTES = { diff --git a/packages/data-designer-config/tests/config/test_models.py b/packages/data-designer-config/tests/config/test_models.py index 0e4382d9..ec6beb72 100644 --- a/packages/data-designer-config/tests/config/test_models.py +++ b/packages/data-designer-config/tests/config/test_models.py @@ -1,9 +1,11 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import base64 import json import tempfile from collections import Counter +from pathlib import Path import pytest import yaml @@ -182,6 +184,67 @@ def test_image_context_validate_image_format(): ImageContext(column_name="image_base64", data_type=ModalityDataType.BASE64) +def test_image_context_no_data_type_passes_validation() -> None: + """Test that ImageContext without data_type passes validation.""" + context = ImageContext(column_name="image_col") + assert context.data_type is None + assert context.image_format is None + + +def test_image_context_auto_detect_url() -> None: + """Test auto-detection with URL value (no data_type).""" + context = ImageContext(column_name="image_col") + result = context.get_contexts({"image_col": "https://example.com/image.png"}) + assert result == [{"type": "image_url", "image_url": "https://example.com/image.png"}] + + +def test_image_context_auto_detect_base64(minimal_png_base64: str) -> None: + """Test auto-detection with base64 value (no data_type) — auto-detects PNG format from bytes.""" + png_base64 = minimal_png_base64 + context = ImageContext(column_name="image_col") + result = context.get_contexts({"image_col": png_base64}) + assert len(result) == 1 + assert result[0]["type"] == "image_url" + assert result[0]["image_url"]["format"] == "png" + assert f"base64,{png_base64}" in result[0]["image_url"]["url"] + + +def test_image_context_auto_detect_file_path_resolved(tmp_path: Path) -> None: + """Test auto-detection with file path that exists under base_path — loaded as base64.""" + images_dir = tmp_path / "images" / "col" + images_dir.mkdir(parents=True) + image_file = images_dir / "test.png" + png_bytes = b"\x89PNG\r\n\x1a\n" + b"\x00" * 50 + image_file.write_bytes(png_bytes) + + context = ImageContext(column_name="image_col") + result = context.get_contexts( + {"image_col": "images/col/test.png"}, + base_path=str(tmp_path), + ) + assert len(result) == 1 + assert result[0]["type"] == "image_url" + expected_base64 = base64.b64encode(png_bytes).decode() + assert f"base64,{expected_base64}" in result[0]["image_url"]["url"] + + +def test_image_context_auto_detect_file_path_not_resolved_without_base_path() -> None: + """Test auto-detection with file path when no base_path — falls through to base64 decode error.""" + context = ImageContext(column_name="image_col") + with pytest.raises(ValueError, match="Invalid base64 data"): + context.get_contexts({"image_col": "images/col/test.png"}) + + +def test_image_context_auto_detect_file_path_not_exists(tmp_path: Path) -> None: + """Test auto-detection with non-existent file path — falls through to base64 decode error.""" + context = ImageContext(column_name="image_col") + with pytest.raises(ValueError, match="Invalid base64 data"): + context.get_contexts( + {"image_col": "images/col/nonexistent.png"}, + base_path=str(tmp_path), + ) + + def test_inference_parameters_default_construction(): empty_inference_parameters = ChatCompletionInferenceParams() assert empty_inference_parameters.generate_kwargs == {} diff --git a/packages/data-designer-config/tests/config/utils/test_image_helpers.py b/packages/data-designer-config/tests/config/utils/test_image_helpers.py index 55314b40..e425582a 100644 --- a/packages/data-designer-config/tests/config/utils/test_image_helpers.py +++ b/packages/data-designer-config/tests/config/utils/test_image_helpers.py @@ -4,7 +4,6 @@ from __future__ import annotations import base64 -import io from pathlib import Path from unittest.mock import Mock, patch @@ -24,16 +23,6 @@ validate_image, ) - -@pytest.fixture -def sample_png_bytes() -> bytes: - """Create a valid 1x1 PNG as raw bytes.""" - img = lazy.Image.new("RGB", (1, 1), color="red") - buf = io.BytesIO() - img.save(buf, format="PNG") - return buf.getvalue() - - # --------------------------------------------------------------------------- # extract_base64_from_data_uri # --------------------------------------------------------------------------- diff --git a/packages/data-designer-config/tests/conftest.py b/packages/data-designer-config/tests/conftest.py index d1c09dd6..15936af7 100644 --- a/packages/data-designer-config/tests/conftest.py +++ b/packages/data-designer-config/tests/conftest.py @@ -1,4 +1,28 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import base64 +import io + +import pytest + +import data_designer.lazy_heavy_imports as lazy + pytest_plugins = ["data_designer.config.testing.fixtures"] + + +@pytest.fixture +def sample_png_bytes() -> bytes: + """Create a valid 1x1 PNG as raw bytes.""" + img = lazy.Image.new("RGB", (1, 1), color="red") + buf = io.BytesIO() + img.save(buf, format="PNG") + return buf.getvalue() + + +@pytest.fixture +def minimal_png_base64(sample_png_bytes: bytes) -> str: + """Return a valid 1x1 PNG as a base64-encoded string.""" + return base64.b64encode(sample_png_bytes).decode() diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/base.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/base.py index 59dab425..932c7fae 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/base.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/base.py @@ -7,7 +7,7 @@ import functools import logging from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, overload +from typing import TYPE_CHECKING, Any, overload from data_designer.config.column_configs import GenerationStrategy from data_designer.engine.configurable_task import ConfigurableTask, DataT, TaskConfigT @@ -98,6 +98,27 @@ def model_config(self) -> ModelConfig: def inference_parameters(self) -> BaseInferenceParams: return self.model_config.inference_parameters + def _build_multi_modal_context(self, record: dict) -> list[dict[str, Any]] | None: + """Build multi-modal context from the config's multi_modal_context list. + + Passes base_path to get_contexts() so that generated image file paths + (stored under base_dataset_path in create mode) can be resolved to base64 + before being sent to the model endpoint. + + Args: + record: The deserialized record containing column values. + + Returns: + A list of multi-modal context dicts, or None if no context is configured. + """ + if not hasattr(self.config, "multi_modal_context") or not self.config.multi_modal_context: + return None + base_path = str(self.base_dataset_path) + multi_modal_context: list[dict[str, Any]] = [] + for context in self.config.multi_modal_context: + multi_modal_context.extend(context.get_contexts(record, base_path=base_path)) + return multi_modal_context + def log_pre_generation(self) -> None: logger.info( f"{self.config.get_column_emoji()} {self.config.column_type} model config for column '{self.config.name}'" diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py index 730e73bb..31095c49 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/image.py @@ -60,11 +60,7 @@ def generate(self, data: dict) -> dict: raise ValueError(f"Rendered prompt for column {self.config.name!r} is empty") # Process multi-modal context if provided - multi_modal_context = None - if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0: - multi_modal_context = [] - for context in self.config.multi_modal_context: - multi_modal_context.extend(context.get_contexts(deserialized_record)) + multi_modal_context = self._build_multi_modal_context(deserialized_record) # Generate images (returns list of base64 strings) base64_images = self.model.generate_image(prompt=prompt, multi_modal_context=multi_modal_context) diff --git a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/llm_completion.py b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/llm_completion.py index a0ff447a..f6625f09 100644 --- a/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/llm_completion.py +++ b/packages/data-designer-engine/src/data_designer/engine/column_generators/generators/llm_completion.py @@ -79,11 +79,7 @@ def _prepare_generation_kwargs(self, data: dict) -> dict[str, Any]: # Note: This creates a new dict and doesn't mutate the original `data` argument deserialized_record = deserialize_json_values(data) - multi_modal_context: list[dict[str, Any]] | None = None - if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0: - multi_modal_context = [] - for context in self.config.multi_modal_context: - multi_modal_context.extend(context.get_contexts(deserialized_record)) + multi_modal_context = self._build_multi_modal_context(deserialized_record) return { "prompt": self.prompt_renderer.render( diff --git a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py index ca5cbfae..fe42bb43 100644 --- a/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py +++ b/packages/data-designer-engine/tests/engine/column_generators/generators/test_image.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import base64 from unittest.mock import Mock, patch import pytest @@ -216,3 +217,89 @@ def test_image_cell_generator_with_base64_multi_modal_context(stub_resource_prov assert call_args.kwargs["multi_modal_context"][0]["type"] == "image_url" # Should be formatted as data URI assert "data:image/png;base64," in call_args.kwargs["multi_modal_context"][0]["image_url"]["url"] + + +def test_image_cell_generator_build_multi_modal_context_returns_none_when_not_configured( + stub_image_column_config: ImageColumnConfig, stub_resource_provider: None +) -> None: + """Test that _build_multi_modal_context returns None when config has no multi_modal_context.""" + generator = ImageCellGenerator(config=stub_image_column_config, resource_provider=stub_resource_provider) + result = generator._build_multi_modal_context({"style": "photorealistic", "subject": "cat"}) + assert result is None + + +def test_image_cell_generator_auto_resolves_generated_image_file_path(stub_resource_provider: Mock) -> None: + """Test that auto-detection resolves generated image file paths to base64 in create mode.""" + # Create ImageContext with no data_type (auto-detect mode) + image_context = ImageContext(column_name="first_image") + + config = ImageColumnConfig( + name="edited_image", + prompt="Edit this image", + model_alias="test_model", + multi_modal_context=[image_context], + ) + + # Create an actual image file under the artifact storage base_dataset_path + base_path = stub_resource_provider.artifact_storage.base_dataset_path + images_dir = base_path / "images" / "first_image" + images_dir.mkdir(parents=True) + image_file = images_dir / "uuid1.png" + png_bytes = b"\x89PNG\r\n\x1a\n" + b"\x00" * 50 + image_file.write_bytes(png_bytes) + + # Setup mock media storage + mock_storage = Mock() + mock_storage.save_base64_image.return_value = "images/edited_image/uuid2.png" + stub_resource_provider.artifact_storage.media_storage = mock_storage + + with patch.object( + stub_resource_provider.model_registry.get_model.return_value, + "generate_image", + return_value=["base64_edited_image"], + ) as mock_generate: + generator = ImageCellGenerator(config=config, resource_provider=stub_resource_provider) + # Simulate create mode: first_image column has a relative file path + data = generator.generate(data={"first_image": "images/first_image/uuid1.png"}) + + assert data["edited_image"] == ["images/edited_image/uuid2.png"] + + # Verify the multi_modal_context was resolved from file path to base64 + mock_generate.assert_called_once() + call_args = mock_generate.call_args + context = call_args.kwargs["multi_modal_context"] + assert context is not None + assert len(context) == 1 + assert context[0]["type"] == "image_url" + # Should contain base64 data, NOT the file path + expected_b64 = base64.b64encode(png_bytes).decode() + assert expected_b64 in context[0]["image_url"]["url"] + + +def test_image_cell_generator_auto_detect_passes_through_urls(stub_resource_provider: Mock) -> None: + """Test that auto-detection passes through URLs without converting to base64.""" + image_context = ImageContext(column_name="reference_image") + + config = ImageColumnConfig( + name="test_image", + prompt="Generate a similar image", + model_alias="test_model", + multi_modal_context=[image_context], + ) + + mock_storage = Mock() + mock_storage.save_base64_image.return_value = "images/generated.png" + stub_resource_provider.artifact_storage.media_storage = mock_storage + + with patch.object( + stub_resource_provider.model_registry.get_model.return_value, + "generate_image", + return_value=["base64_image"], + ) as mock_generate: + generator = ImageCellGenerator(config=config, resource_provider=stub_resource_provider) + generator.generate(data={"reference_image": "https://example.com/image.png"}) + + mock_generate.assert_called_once() + context = mock_generate.call_args.kwargs["multi_modal_context"] + assert context is not None + assert context[0]["image_url"] == "https://example.com/image.png"