From 219bdf4da5f5bb70c7a45977d7fc2652da676d4a Mon Sep 17 00:00:00 2001 From: stevhliu Date: Mon, 14 Jul 2025 09:58:48 -0700 Subject: [PATCH 01/15] start --- docs/source/en/_toctree.yml | 36 +++++++++++++++---- docs/source/en/modular_diffusers/overview.md | 38 ++++++++++---------- src/diffusers/__init__.py | 6 ++++ 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index b959831111cb..0b9f55316f59 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -112,6 +112,8 @@ sections: - local: modular_diffusers/overview title: Overview + - local: modular_diffusers/end_to_end_guide + title: Quickstart - local: modular_diffusers/modular_pipeline title: Modular Pipeline - local: modular_diffusers/components_manager @@ -126,12 +128,34 @@ title: Loop Sequential Pipeline Blocks - local: modular_diffusers/auto_pipeline_blocks title: Auto Pipeline Blocks - - local: modular_diffusers/end_to_end_guide - title: End-to-End Example - -- title: Training - isExpanded: false - sections: + title: Modular Diffusers +- sections: + - local: using-diffusers/consisid + title: ConsisID + - local: using-diffusers/sdxl + title: Stable Diffusion XL + - local: using-diffusers/sdxl_turbo + title: SDXL Turbo + - local: using-diffusers/kandinsky + title: Kandinsky + - local: using-diffusers/omnigen + title: OmniGen + - local: using-diffusers/pag + title: PAG + - local: using-diffusers/inference_with_lcm + title: Latent Consistency Model + - local: using-diffusers/shap-e + title: Shap-E + - local: using-diffusers/diffedit + title: DiffEdit + - local: using-diffusers/inference_with_tcd_lora + title: Trajectory Consistency Distillation-LoRA + - local: using-diffusers/svd + title: Stable Video Diffusion + - local: using-diffusers/marigold_usage + title: Marigold Computer Vision + title: Specific pipeline examples +- sections: - local: training/overview title: Overview - local: training/create_dataset diff --git a/docs/source/en/modular_diffusers/overview.md b/docs/source/en/modular_diffusers/overview.md index 9702cea0633d..c8d2c6755ff2 100644 --- a/docs/source/en/modular_diffusers/overview.md +++ b/docs/source/en/modular_diffusers/overview.md @@ -10,33 +10,31 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# Getting Started with Modular Diffusers +# Overview - +> [!WARNING] +> ⚠︎ Modular Diffusers is still under active development and it's API may change. -🧪 **Experimental Feature**: Modular Diffusers is an experimental feature we are actively developing. The API may be subject to breaking changes. +Modular Diffusers is a unified pipeline system that simplifies your workflow with *pipeline blocks*. - +- Blocks are reusable and you only create new blocks that are unique to your pipeline. +- Blocks can be mixed and matched to adapt to or create a pipeline for a specific workflow or multiple workflows. -With Modular Diffusers, we introduce a unified pipeline system that simplifies how you work with diffusion models. Instead of creating separate pipelines for each task, Modular Diffusers lets you: +The Modular Diffusers docs are organized as shown below. -**Write Only What's New**: You won't need to write an entire pipeline from scratch every time you have a new use case. You can create pipeline blocks just for your new workflow's unique aspects and reuse existing blocks for existing functionalities. +## Quickstart -**Assemble Like LEGO®**: You can mix and match between blocks in flexible ways. This allows you to write dedicated blocks unique to specific workflows, and then assemble different blocks into a pipeline that can be used more conveniently for multiple workflows. +- A [quickstart](./end_to_end_guide) start for implementing an example workflow with Modular Diffusers. +## ModularPipeline -Here's how our guides are organized to help you navigate the Modular Diffusers documentation: +- [ModularPipeline](./modular_pipeline) shows you how to build with pipeline blocks and convert them into an executable [`ModularPipeline`]. +- [ComponentsManager](./components_manager) shows you how to manage and reuse components across multiple pipelines. -### 🚀 Running Pipelines -- **[Modular Pipeline Guide](./modular_pipeline.md)** - How to use predefined blocks to build a pipeline and run it -- **[Components Manager Guide](./components_manager.md)** - How to manage and reuse components across multiple pipelines +## ModularPipelineBlocks -### 📚 Creating PipelineBlocks -- **[Pipeline and Block States](./modular_diffusers_states.md)** - Understanding PipelineState and BlockState -- **[Pipeline Block](./pipeline_block.md)** - How to write custom PipelineBlocks -- **[SequentialPipelineBlocks](sequential_pipeline_blocks.md)** - Connecting blocks in sequence -- **[LoopSequentialPipelineBlocks](./loop_sequential_pipeline_blocks.md)** - Creating iterative workflows -- **[AutoPipelineBlocks](./auto_pipeline_blocks.md)** - Conditional block selection - -### 🎯 Practical Examples -- **[End-to-End Example](./end_to_end_guide.md)** - Complete end-to-end examples including sharing your workflow in huggingface hub and deplying UI nodes +- [States](./modular_diffusers_states) explains how data is shared and communicated between pipeline blocks and [`ModularPipeline`]. +- [PipelineBlock](./pipeline_block) is the most basic unit of a [`ModularPipeline`] and this guide shows you how to create one. +- [SequentialPipelineBlocks](./sequential_pipeline_blocks) is a type of block that chains multiple blocks so they run one after another, passing data along the chain. This guide shows you how to create [`SequentialPipelineBlocks`] and how they connect and work together. +- [LoopSequentialPipelineBlocks](./loop_sequential_pipeline_blocks) is a type of block that runs a series of blocks in a loop. This guide shows you how to create [`LoopSequentialPipelineBlocks`]. +- [AutoPipelineBlocks](./auto_pipeline_blocks) is a type of block that automatically chooses which blocks to run based on the input. This guide shows you how to create [`AutoPipelineBlocks`]. \ No newline at end of file diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 1414d0fc690a..c53a980262fc 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -61,6 +61,12 @@ "is_unidecode_available", "logging", ], + "image_processor": [ + "VaeImageProcessor", + ], + "video_processor": [ + "VideoProcessor", + ], } try: From 5ffc8de5699391e6f58aa9b033d86f3b2f2f4992 Mon Sep 17 00:00:00 2001 From: stevhliu Date: Tue, 15 Jul 2025 11:09:35 -0700 Subject: [PATCH 02/15] draft --- docs/source/en/_toctree.yml | 4 +- .../en/modular_diffusers/end_to_end_guide.md | 685 ++++++------------ docs/source/en/modular_diffusers/overview.md | 2 +- .../source/en/modular_diffusers/quickstart.md | 356 +++++++++ 4 files changed, 587 insertions(+), 460 deletions(-) create mode 100644 docs/source/en/modular_diffusers/quickstart.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 0b9f55316f59..133f867a4561 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -112,8 +112,10 @@ sections: - local: modular_diffusers/overview title: Overview - - local: modular_diffusers/end_to_end_guide + - local: modular_diffusers/quickstart title: Quickstart + - local: modular_diffusers/end_to_end_guide + title: End-to-end example - local: modular_diffusers/modular_pipeline title: Modular Pipeline - local: modular_diffusers/components_manager diff --git a/docs/source/en/modular_diffusers/end_to_end_guide.md b/docs/source/en/modular_diffusers/end_to_end_guide.md index cb7b87552a37..5aea48004444 100644 --- a/docs/source/en/modular_diffusers/end_to_end_guide.md +++ b/docs/source/en/modular_diffusers/end_to_end_guide.md @@ -10,212 +10,130 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# End-to-End Developer Guide: Building with Modular Diffusers +# Quickstart - +Modular Diffusers is designed to help developers quickly create new workflows within a modular framework by assembling blocks into a pipeline. The blocks are highly flexible and can be reused or mixed-and-matched in many ways. You only need to create a new block if it doesn't already. -🧪 **Experimental Feature**: Modular Diffusers is an experimental feature we are actively developing. The API may be subject to breaking changes. +This doc will show you how to implement the [Differential Diffusion](https://differential-diffusion.github.io/) pipeline with the modular framework in 4 steps. - +1. Identify an existing pipeline that is similar to what you're trying to implement and determine what part of the pipeline needs to be modified. +2. Build out the pipeline using existing blocks wherever possible and creating placeholders for the new blocks. +3. Test the pipeline with expected inputs and outputs. +4. Add custom logic to the blocks you want to change, and test incrementally to make sure it works. +## Identify required blocks -In this tutorial we will walk through the process of adding a new pipeline to the modular framework using differential diffusion as our example. We'll cover the complete workflow from implementation to deployment: implementing the new pipeline, ensuring compatibility with existing tools, sharing the code on Hugging Face Hub, and deploying it as a UI node. +[Differential Diffusion](https://differential-diffusion.github.io/) is an image-to-image workflow that uses a map to determine the amount of change in each image region. Regions with lower pixel values are replaced with the original noised latents to preserve more of the original image. It is unlike a standard image-to-image worfklow which applies uniform noise to all pixels based on the `strength` parameter. -We'll also demonstrate the 4-step framework process we use for implementing new basic pipelines in the modular system. - -1. **Start with an existing pipeline as a base** - - Identify which existing pipeline is most similar to the one you want to implement - - Determine what part of the pipeline needs modification - -2. **Build a working pipeline structure first** - - Assemble the complete pipeline structure - - Use existing blocks wherever possible - - For new blocks, create placeholders (e.g. you can copy from similar blocks and change the name) without implementing custom logic just yet - -3. **Set up an example** - - Create a simple inference script with expected inputs/outputs - -4. **Implement your custom logic and test incrementally** - - Add the custom logics the blocks you want to change - - Test incrementally, and inspect pipeline states and debug as needed - -Let's see how this works with the Differential Diffusion example. - - -## Differential Diffusion Pipeline - -### Start with an existing pipeline - -Differential diffusion (https://differential-diffusion.github.io/) is an image-to-image workflow, so it makes sense for us to start with the preset of pipeline blocks used to build img2img pipeline (`IMAGE2IMAGE_BLOCKS`) and see how we can build this new pipeline with them. +This means you should start with the preset blocks for an image-to-image pipeline. ```py ->>> from diffusers.modular_pipelines.stable_diffusion_xl import IMAGE2IMAGE_BLOCKS ->>> IMAGE2IMAGE_BLOCKS = InsertableDict([ -... ("text_encoder", StableDiffusionXLTextEncoderStep), -... ("image_encoder", StableDiffusionXLVaeEncoderStep), -... ("input", StableDiffusionXLInputStep), -... ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep), -... ("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep), -... ("prepare_add_cond", StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep), -... ("denoise", StableDiffusionXLDenoiseStep), -... ("decode", StableDiffusionXLDecodeStep) -... ]) +from diffusers.modular_pipelines.stable_diffusion_xl import IMAGE2IMAGE_BLOCKS + +IMAGE2IMAGE_BLOCKS = InsertableDict([ + ("text_encoder", StableDiffusionXLTextEncoderStep), + ("image_encoder", StableDiffusionXLVaeEncoderStep), + ("input", StableDiffusionXLInputStep), + ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep), + ("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep), + ("prepare_add_cond", StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep), + ("denoise", StableDiffusionXLDenoiseStep), + ("decode", StableDiffusionXLDecodeStep) +]) ``` -Note that "denoise" (`StableDiffusionXLDenoiseStep`) is a `LoopSequentialPipelineBlocks` that contains 3 loop blocks (more on LoopSequentialPipelineBlocks [here](https://huggingface.co/docs/diffusers/modular_diffusers/write_own_pipeline_block#loopsequentialpipelineblocks)) +The blocks that need to be modified are: -```py ->>> denoise_blocks = IMAGE2IMAGE_BLOCKS["denoise"]() ->>> print(denoise_blocks) -``` +- The `prepare_latents` block to create the change map and pre-compute noised latents for all timesteps. +- The `denoise` block to selectively apply denoising based on the change map. This block is a `LoopSequentialPipelineBlocks` which means it contains other blocks in it that it executes in a loop. -```out -StableDiffusionXLDenoiseStep( - Class: StableDiffusionXLDenoiseLoopWrapper + ```py + denoise_blocks = IMAGE2IMAGE_BLOCKS["denoise"]() + print(denoise_blocks) + ``` - Description: Denoise step that iteratively denoise the latents. - Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method - At each iteration, it runs blocks defined in `sub_blocks` sequencially: - - `StableDiffusionXLLoopBeforeDenoiser` - - `StableDiffusionXLLoopDenoiser` - - `StableDiffusionXLLoopAfterDenoiser` - This block supports both text2img and img2img tasks. + Print this out to view the docs for it. You'll see that it contains 3 sub-blocks, `before_denoiser`, `denoiser`, and `after_denoiser`. The `before_denoiser` sub-block should be modified to prepare the latent input for the denoiser based on the change map. +All of the other blocks can be reused from the existing image-to-image and text-to-image workflows. You only need to change the `prepare_latents` and `denoise` blocks. - Components: - scheduler (`EulerDiscreteScheduler`) - guider (`ClassifierFreeGuidance`) - unet (`UNet2DConditionModel`) +## Build the pipeline - Sub-Blocks: - [0] before_denoiser (StableDiffusionXLLoopBeforeDenoiser) - Description: step within the denoising loop that prepare the latent input for the denoiser. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `StableDiffusionXLDenoiseLoopWrapper`) - - [1] denoiser (StableDiffusionXLLoopDenoiser) - Description: Step within the denoising loop that denoise the latents with guidance. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `StableDiffusionXLDenoiseLoopWrapper`) - - [2] after_denoiser (StableDiffusionXLLoopAfterDenoiser) - Description: step within the denoising loop that update the latents. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `StableDiffusionXLDenoiseLoopWrapper`) - -) -``` - -Let's compare standard image-to-image and differential diffusion! The key difference in algorithm is that standard image-to-image diffusion applies uniform noise across all pixels based on a single `strength` parameter, but differential diffusion uses a change map where each pixel value determines when that region starts denoising. Regions with lower values get "frozen" earlier by replacing them with noised original latents, preserving more of the original image. - -Therefore, the key differences when it comes to pipeline implementation would be: -1. The `prepare_latents` step (which prepares the change map and pre-computes noised latents for all timesteps) -2. The `denoise` step (which selectively applies denoising based on the change map) -3. Since differential diffusion doesn't use the `strength` parameter, we'll use the text-to-image `set_timesteps` step instead of the image-to-image version - -To implement differntial diffusion, we can reuse most blocks from image-to-image and text-to-image workflows, only modifying the `prepare_latents` step and the first part of the `denoise` step (i.e. `before_denoiser (StableDiffusionXLLoopBeforeDenoiser)`). - -Here's a flowchart showing the pipeline structure and the changes we need to make: - - -![DiffDiff Pipeline Structure](https://mermaid.ink/img/pako:eNqVVO9r4kAQ_VeWLQWFKEk00eRDwZpa7Q-ucPfpYpE1mdWlcTdsVmpb-7_fZk1tTCl3J0Sy8968N5kZ9g0nIgUc4pUk-Rr9iuYc6d_Ibs14vlXoQYpNrtqo07lAo1jBTi2AlynysWIa6DJmG7KCBnZpsHHMSqkqNjaxKC5ALRTbQKEgLyosMthVnEvIiYRFRhRwVaBoNpmUT0W7MrTJkUbSdJEInlbwxMDXcQpcsAKq6OH_2mDTODIY4yt0J0ReUaYGnLXiJVChdSsB-enfPhBnhnjT-rCQj-1K_8Ygt62YUAVy8Ykf4FvU6XYu9rpuIGqPpvXSzs_RVEj2KrgiGUp02zNQTHBEM_FcK3BfQbBHd7qAst-PxvW-9WOrypnNylG0G9oRUMYBFeolg-IQTTJSFDqOUkZp-fwsQURZloVnlPpLf2kVSoonCM-SwCUuqY6dZ5aqddjLd1YiMiFLNrWorrxj9EOmP4El37lsl_9p5PzFqIqwVwgdN981fDM94bphH5I06R8NXZ_4QcPQPTFs6JltPrS6JssFhw9N817l27bdyM-lSKAo6iVBAAnQY0n9wLO9wbcluY7ruUFDtdguH74K0yENKDkK-8nAG6TfNrfy_bf-HjdrlOfZS7VYSAlU5JAwyhLE9WrWVw1dWdPTXauDsy8LUkdHtnX_pfMnBOvSGluRNbGurbuTHtdZN9Zts1MljC19_7EUh0puwcIbkBtSHvFbic6xWsMG5jjUrymRT3M85-86Jyf8txCbjzQptqs1DinJCn3a5qm-viJG9M26OUYlcH0_jsWWKxwGttHA4Rve4dD1el3H8_yh49hD3_X7roVfcNhx-l3b14PxvGHQ0xMa9t4t_Gp8na7tDvu-4w08HXecweD9D4X54ZI) - - -### Build a Working Pipeline Structure - -ok now we've identified the blocks to modify, let's build the pipeline skeleton first - at this stage, our goal is to get the pipeline struture working end-to-end (even though it's just doing the img2img behavior). I would simply create placeholder blocks by copying from existing ones: +Once you've identified which blocks you need, create the pipeline. You don't need to modify the blocks yet. Create a placeholder block by copying the existing ones. ```py ->>> # Copy existing blocks as placeholders ->>> class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): -... """Copied from StableDiffusionXLImg2ImgPrepareLatentsStep - will modify later""" -... # ... same implementation as StableDiffusionXLImg2ImgPrepareLatentsStep -... ->>> class SDXLDiffDiffLoopBeforeDenoiser(PipelineBlock): -... """Copied from StableDiffusionXLLoopBeforeDenoiser - will modify later""" -... # ... same implementation as StableDiffusionXLLoopBeforeDenoiser +class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): + """Copied from StableDiffusionXLImg2ImgPrepareLatentsStep - will modify later""" + # ... same implementation as StableDiffusionXLImg2ImgPrepareLatentsStep ``` -`SDXLDiffDiffLoopBeforeDenoiser` is the be part of the denoise loop we need to change. Let's use it to assemble a `SDXLDiffDiffDenoiseStep`. +Assemble the denoise block for Differential Diffusion. Replace the `StableDiffusionXLLoopBeforeDenoiser` with `SDXLDiffDiffLoopBeforeDenoiser` because that is the only sub-block that needs modifying. ```py ->>> class SDXLDiffDiffDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): -... block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLLoopDenoiser, StableDiffusionXLLoopAfterDenoiser] -... block_names = ["before_denoiser", "denoiser", "after_denoiser"] +class SDXLDiffDiffDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): + block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLLoopDenoiser, StableDiffusionXLLoopAfterDenoiser] + block_names = ["before_denoiser", "denoiser", "after_denoiser"] ``` -Now we can put together our differential diffusion pipeline. +Now put the whole pipeline together. Copy the existing blocks and set the `prepare_latents` and `denoise` blocks to the ones you just created. + +> [!TIP] +> The `set_timesteps` block is reused from the text-to-image workflow because Differential Diffusion doesn't use the `strength` parameter. ```py ->>> DIFFDIFF_BLOCKS = IMAGE2IMAGE_BLOCKS.copy() ->>> DIFFDIFF_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"] ->>> DIFFDIFF_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep ->>> DIFFDIFF_BLOCKS["denoise"] = SDXLDiffDiffDenoiseStep ->>> ->>> dd_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_BLOCKS) ->>> print(dd_blocks) ->>> # At this point, the pipeline works exactly like img2img since our blocks are just copies +DIFFDIFF_BLOCKS = IMAGE2IMAGE_BLOCKS.copy() +DIFFDIFF_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"] +DIFFDIFF_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep +DIFFDIFF_BLOCKS["denoise"] = SDXLDiffDiffDenoiseStep + +dd_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_BLOCKS) +print(dd_blocks) ``` -### Set up an example +## Test example -ok, so now our blocks should be able to compile without an error, we can move on to the next step. Let's setup a simple example so we can run the pipeline as we build it. diff-diff use same model checkpoints as SDXL so we can fetch the models from a regular SDXL repo. +The pipeline so far is a collection of blocks that specify how it should work. To add components like a UNet, call `init_pipeline` to initialize a [`ModularPipeline`] with the expected component specifications. Then call [`~ModularPipeline.load_default_components`] to load the model components into the pipeline. ```py ->>> dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") ->>> dd_pipeline.load_default_componenets(torch_dtype=torch.float16) ->>> dd_pipeline.to("cuda") +dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") +dd_pipeline.load_default_componenets(torch_dtype=torch.float16) +dd_pipeline.to("cuda") ``` -We will use this example script: +Test the pipeline with a basic example. The [`ModularPipeline`] is nearly identical to a standard [`DiffusionPipeline`] except you should specify the expected output type to `"images"`. ```py ->>> image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") ->>> mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") ->>> ->>> prompt = "a green pear" ->>> negative_prompt = "blurry" ->>> ->>> image = dd_pipeline( -... prompt=prompt, -... negative_prompt=negative_prompt, -... num_inference_steps=25, -... diffdiff_map=mask, -... image=image, -... output="images" -... )[0] ->>> ->>> image.save("diffdiff_out.png") +image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") +mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") + +prompt = "a green pear" +negative_prompt = "blurry" + +image = dd_pipeline( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=25, + diffdiff_map=mask, + image=image, + output="images" +)[0] + +image.save("diffdiff_out.png") ``` -If you run the script right now, you will get a complaint about unexpected input `diffdiff_map`. -and you would get the same result as the original img2img pipeline. +At this point, the [`ModularPipeline`] behaves exactly like a standard image-to-image pipeline. -### implement your custom logic and test incrementally +## Implement custom logic -Let's modify the pipeline so that we can get expected result with this example script. +Now that the pipeline works, start implementing the required custom logic for Differential Diffusion. -We'll start with the `prepare_latents` step. The main changes are: -- Requires a new user input `diffdiff_map` -- Requires new component `mask_processor` to process the `diffdiff_map` -- Requires new intermediate inputs: - - Need `timestep` instead of `latent_timestep` to precompute all the latents - - Need `num_inference_steps` to create the `diffdiff_masks` -- create a new output `diffdiff_masks` and `original_latents` +### prepare_latents - +> [!TIP] +> Use `print(dd_pipeline.doc)` to check the inputs and outputs of the pipeline and add `print(state)` and `print(block_state)` statements inside the `__call__` method to inspect the intermediate results. +> This helps understand what is happening at each stage of the pipeline and is useful for debugging. -💡 use `print(dd_pipeline.doc)` to check compiled inputs and outputs of the built piepline. +The `prepare_latents` block requires a processor to process the change map. -e.g. after we added `diffdiff_map` as an input in this step, we can run `print(dd_pipeline.doc)` to verify that it shows up in the docstring as a user input. - - - -Once we make sure all the variables we need are available in the block state, we can implement the diff-diff logic inside `__call__`. We created 2 new variables: the change map `diffdiff_mask` and the pre-computed noised latents for all timesteps `original_latents`. - - - -💡 Implement incrementally! Run the example script as you go, and insert `print(state)` and `print(block_state)` everywhere inside the `__call__` method to inspect the intermediate results. This helps you understand what's going on and what each line you just added does. - - - -Here are the key changes we made to implement differential diffusion: - -**1. Modified `prepare_latents` step:** ```diff class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): @property @@ -225,7 +143,14 @@ class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): ComponentSpec("scheduler", EulerDiscreteScheduler), + ComponentSpec("mask_processor", VaeImageProcessor, config=FrozenDict({"do_normalize": False, "do_convert_grayscale": True})) ] +``` + +The `prepare_latents` block requires two new inputs: + +- a change map (`diffdiff_map`) +- a `timestep` for precomputing all the latents and `num_inference_steps` to create the mask for updating the image regions +```diff @property def inputs(self) -> List[Tuple[str, Any]]: return [ @@ -247,7 +172,11 @@ class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): + OutputParam("original_latents", type_hint=torch.Tensor), + OutputParam("diffdiff_masks", type_hint=torch.Tensor), ] +``` + +The `prepare_latents` block should output the `diffdiff_masks` and `original_latents`. Add the custom logic in the `__call__` method. +```diff def __call__(self, components, state: PipelineState): # ... existing logic ... + # Process change map and create masks @@ -257,7 +186,13 @@ class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): + block_state.original_latents = block_state.latents ``` -**2. Modified `before_denoiser` step:** +### before_denoiser + +The `before_denoiser` sub-block requires three new inputs: + +- a `denoising_start` parameter +- the `diffdiff_masks` and `original_latents` outputs from `prepare_latents` are now inputs to `before_denoiser` + ```diff class SDXLDiffDiffLoopBeforeDenoiser(PipelineBlock): @property @@ -279,7 +214,11 @@ class SDXLDiffDiffLoopBeforeDenoiser(PipelineBlock): + InputParam("original_latents", type_hint=torch.Tensor), + InputParam("diffdiff_masks", type_hint=torch.Tensor), ] +``` + +Add the custom logic in the `__call__` method. +```diff def __call__(self, components, block_state, i, t): + # Apply differential diffusion logic + if i == 0 and block_state.denoising_start is None: @@ -291,358 +230,188 @@ class SDXLDiffDiffLoopBeforeDenoiser(PipelineBlock): # ... rest of existing logic ... ``` -That's all there is to it! We've just created a simple sequential pipeline by mix-and-match some existing and new pipeline blocks. - -Now we use the process we've prepred in step2 to build the pipeline and inspect it. - +Run the pipeline again and you should get the image below. ```py ->> dd_pipeline -SequentialPipelineBlocks( - Class: ModularPipelineBlocks - - Description: - - - Components: - text_encoder (`CLIPTextModel`) - text_encoder_2 (`CLIPTextModelWithProjection`) - tokenizer (`CLIPTokenizer`) - tokenizer_2 (`CLIPTokenizer`) - guider (`ClassifierFreeGuidance`) - vae (`AutoencoderKL`) - image_processor (`VaeImageProcessor`) - scheduler (`EulerDiscreteScheduler`) - mask_processor (`VaeImageProcessor`) - unet (`UNet2DConditionModel`) - - Configs: - force_zeros_for_empty_prompt (default: True) - requires_aesthetics_score (default: False) - - Blocks: - [0] text_encoder (StableDiffusionXLTextEncoderStep) - Description: Text Encoder step that generate text_embeddings to guide the image generation - - [1] image_encoder (StableDiffusionXLVaeEncoderStep) - Description: Vae Encoder step that encode the input image into a latent representation - - [2] input (StableDiffusionXLInputStep) - Description: Input processing step that: - 1. Determines `batch_size` and `dtype` based on `prompt_embeds` - 2. Adjusts input tensor shapes based on `batch_size` (number of prompts) and `num_images_per_prompt` - - All input tensors are expected to have either batch_size=1 or match the batch_size - of prompt_embeds. The tensors will be duplicated across the batch dimension to - have a final batch_size of batch_size * num_images_per_prompt. - - [3] set_timesteps (StableDiffusionXLSetTimestepsStep) - Description: Step that sets the scheduler's timesteps for inference - - [4] prepare_latents (SDXLDiffDiffPrepareLatentsStep) - Description: Step that prepares the latents for the differential diffusion generation process - - [5] prepare_add_cond (StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep) - Description: Step that prepares the additional conditioning for the image-to-image/inpainting generation process - - [6] denoise (SDXLDiffDiffDenoiseStep) - Description: Pipeline block that iteratively denoise the latents over `timesteps`. The specific steps with each iteration can be customized with `sub_blocks` attributes - - [7] decode (StableDiffusionXLDecodeStep) - Description: Step that decodes the denoised latents into images - -) +image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") +mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") + +prompt = "a green pear" +negative_prompt = "blurry" + +image = dd_pipeline( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=25, + diffdiff_map=mask, + image=image, + output="images" +)[0] + +image.save("diffdiff_out.png") ``` -Run the example now, you should see an apple with its right half transformed into a green pear. +
+ +
-![Image description](https://cdn-uploads.huggingface.co/production/uploads/624ef9ba9d608e459387b34e/4zqJOz-35Q0i6jyUW3liL.png) +## Adding blocks +You can add blocks to a [`ModularPipeline`] to support additional features without rewriting the pipeline from scratch. -## Adding IP-adapter +This section demonstrates how to add an IP-Adapter or ControlNet. -We provide an auto IP-adapter block that you can plug-and-play into your modular workflow. It's an `AutoPipelineBlocks`, so it will only run when the user passes an IP adapter image. In this tutorial, we'll focus on how to package it into your differential diffusion workflow. To learn more about `AutoPipelineBlocks`, see [here](./auto_pipeline_blocks.md) +### IP-Adapter -We talked about how to add IP-adapter into your workflow in the [Modular Pipeline Guide](./modular_pipeline.md). Let's just go ahead to create the IP-adapter block. +Stable Diffusion XL already has a preset IP-Adapter block that you can use and doesn't require any changes to the existing Differential Diffusion pipeline. ```py ->>> from diffusers.modular_pipelines.stable_diffusion_xl.encoders import StableDiffusionXLAutoIPAdapterStep ->>> ip_adapter_block = StableDiffusionXLAutoIPAdapterStep() +from diffusers.modular_pipelines.stable_diffusion_xl.encoders import StableDiffusionXLAutoIPAdapterStep + +ip_adapter_block = StableDiffusionXLAutoIPAdapterStep() ``` -We can directly add the ip-adapter block instance to the `diffdiff_blocks` that we created before. The `sub_blocks` attribute is a `InsertableDict`, so we're able to insert the it at specific position (index `0` here). +Use the [`sub_blocks.insert`] method to insert it into the [`ModularPipeline`]. The example below inserts the `ip_adapter_block` at position `0`. Print the pipeline to see that the `ip_adapter_block` is added and it requires an `ip_adapter_image`. This also added two components to the pipeline, the `image_encoder` and `feature_extractor`. ```py ->>> dd_blocks.sub_blocks.insert("ip_adapter", ip_adapter_block, 0) +dd_blocks.sub_blocks.insert("ip_adapter", ip_adapter_block, 0) ``` -Take a look at the new diff-diff pipeline with ip-adapter! +Call [`~ModularPipeline.init_pipeline`] to initialize a [`ModularPipeline`] and use [`~ModularPipeline.load_default_components`] to load the model components. Load and set the IP-Adapter to run the pipeline. ```py ->>> print(dd_blocks) +dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") +dd_pipeline.load_default_components(torch_dtype=torch.float16) +dd_pipeline.loader.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") +dd_pipeline.loader.set_ip_adapter_scale(0.6) +dd_pipeline = dd_pipeline.to(device) + +ip_adapter_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_orange.jpeg") +image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") +mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") + +prompt = "a green pear" +negative_prompt = "blurry" +generator = torch.Generator(device=device).manual_seed(42) + +image = dd_pipeline( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=25, + generator=generator, + ip_adapter_image=ip_adapter_image, + diffdiff_map=mask, + image=image, + output="images" +)[0] ``` -The pipeline now lists ip-adapter as its first block, and tells you that it will run only if `ip_adapter_image` is provided. It also includes the two new components from ip-adpater: `image_encoder` and `feature_extractor` - -```out -SequentialPipelineBlocks( - Class: ModularPipelineBlocks - - ==================================================================================================== - This pipeline contains blocks that are selected at runtime based on inputs. - Trigger Inputs: {'ip_adapter_image'} - Use `get_execution_blocks()` with input names to see selected blocks (e.g. `get_execution_blocks('ip_adapter_image')`). - ==================================================================================================== - - - Description: +### ControlNet +Stable Diffusion XL already has a preset ControlNet block that can readily be used. - Components: - image_encoder (`CLIPVisionModelWithProjection`) - feature_extractor (`CLIPImageProcessor`) - unet (`UNet2DConditionModel`) - guider (`ClassifierFreeGuidance`) - text_encoder (`CLIPTextModel`) - text_encoder_2 (`CLIPTextModelWithProjection`) - tokenizer (`CLIPTokenizer`) - tokenizer_2 (`CLIPTokenizer`) - vae (`AutoencoderKL`) - image_processor (`VaeImageProcessor`) - scheduler (`EulerDiscreteScheduler`) - mask_processor (`VaeImageProcessor`) - - Configs: - force_zeros_for_empty_prompt (default: True) - requires_aesthetics_score (default: False) - - Blocks: - [0] ip_adapter (StableDiffusionXLAutoIPAdapterStep) - Description: Run IP Adapter step if `ip_adapter_image` is provided. - - [1] text_encoder (StableDiffusionXLTextEncoderStep) - Description: Text Encoder step that generate text_embeddings to guide the image generation - - [2] image_encoder (StableDiffusionXLVaeEncoderStep) - Description: Vae Encoder step that encode the input image into a latent representation - - [3] input (StableDiffusionXLInputStep) - Description: Input processing step that: - 1. Determines `batch_size` and `dtype` based on `prompt_embeds` - 2. Adjusts input tensor shapes based on `batch_size` (number of prompts) and `num_images_per_prompt` - - All input tensors are expected to have either batch_size=1 or match the batch_size - of prompt_embeds. The tensors will be duplicated across the batch dimension to - have a final batch_size of batch_size * num_images_per_prompt. - - [4] set_timesteps (StableDiffusionXLSetTimestepsStep) - Description: Step that sets the scheduler's timesteps for inference - - [5] prepare_latents (SDXLDiffDiffPrepareLatentsStep) - Description: Step that prepares the latents for the differential diffusion generation process - - [6] prepare_add_cond (StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep) - Description: Step that prepares the additional conditioning for the image-to-image/inpainting generation process - - [7] denoise (SDXLDiffDiffDenoiseStep) - Description: Pipeline block that iteratively denoise the latents over `timesteps`. The specific steps with each iteration can be customized with `sub_blocks` attributes - - [8] decode (StableDiffusionXLDecodeStep) - Description: Step that decodes the denoised latents into images +```py +from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks import StableDiffusionXLAutoControlNetInputStep -) +control_input_block = StableDiffusionXLAutoControlNetInputStep() ``` -Let's test it out. We used an orange image to condition the generation via ip-addapter and we can see a slight orange color and texture in the final output. +However, it requires modifying the `denoise` block because that's where the ControlNet injects the control information into the UNet. +Modify the `denoise` block by replacing the `StableDiffusionXLLoopDenoiser` sub-block with the `StableDiffusionXLControlNetLoopDenoiser`. ```py ->>> ip_adapter_block = StableDiffusionXLAutoIPAdapterStep() ->>> dd_blocks.sub_blocks.insert("ip_adapter", ip_adapter_block, 0) ->>> ->>> dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") ->>> dd_pipeline.load_default_components(torch_dtype=torch.float16) ->>> dd_pipeline.loader.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") ->>> dd_pipeline.loader.set_ip_adapter_scale(0.6) ->>> dd_pipeline = dd_pipeline.to(device) ->>> ->>> ip_adapter_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_orange.jpeg") ->>> image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") ->>> mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") ->>> ->>> prompt = "a green pear" ->>> negative_prompt = "blurry" ->>> generator = torch.Generator(device=device).manual_seed(42) ->>> ->>> image = dd_pipeline( -... prompt=prompt, -... negative_prompt=negative_prompt, -... num_inference_steps=25, -... generator=generator, -... ip_adapter_image=ip_adapter_image, -... diffdiff_map=mask, -... image=image, -... output="images" -... )[0] -``` - -## Working with ControlNets +class SDXLDiffDiffControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): + block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLControlNetLoopDenoiser, StableDiffusionXLDenoiseLoopAfterDenoiser] + block_names = ["before_denoiser", "denoiser", "after_denoiser"] -What about controlnet? Can differential diffusion work with controlnet? The key differences between a regular pipeline and a ControlNet pipeline are: -1. A ControlNet input step that prepares the control condition -2. Inside the denoising loop, a modified denoiser step where the control image is first processed through ControlNet, then control information is injected into the UNet - -From looking at the code workflow: differential diffusion only modifies the "before denoiser" step, while ControlNet operates within the "denoiser" itself. Since they intervene at different points in the pipeline, they should work together without conflicts. - -Intuitively, these two techniques are orthogonal and should combine naturally: differential diffusion controls how much the inference process can deviate from the original in each region, while ControlNet controls in what direction that change occurs. - -With this understanding, let's assemble the diffdiff-controlnet loop by combining the diffdiff before-denoiser step and controlnet denoiser step. - -```py ->>> class SDXLDiffDiffControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): -... block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLControlNetLoopDenoiser, StableDiffusionXLDenoiseLoopAfterDenoiser] -... block_names = ["before_denoiser", "denoiser", "after_denoiser"] ->>> ->>> controlnet_denoise_block = SDXLDiffDiffControlNetDenoiseStep() ->>> # print(controlnet_denoise) +controlnet_denoise_block = SDXLDiffDiffControlNetDenoiseStep() ``` -We provide a auto controlnet input block that you can directly put into your workflow to proceess the `control_image`: similar to auto ip-adapter block, this step will only run if `control_image` input is passed from user. It work with both controlnet and controlnet union. - +Insert the `controlnet_input` block and replace the `denoise` block with the new `controlnet_denoise_block`. Initialize a [`ModularPipeline`] and [`~ModularPipeline.load_default_components`] into it. ```py ->>> from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks import StableDiffusionXLAutoControlNetInputStep ->>> control_input_block = StableDiffusionXLAutoControlNetInputStep() ->>> print(control_input_block) +dd_blocks.sub_blocks.insert("controlnet_input", control_input_block, 7) +dd_blocks.sub_blocks["denoise"] = controlnet_denoise_block + +dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") +dd_pipeline.load_default_components(torch_dtype=torch.float16) +dd_pipeline = dd_pipeline.to(device) + +control_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_tomato_canny.jpeg") +image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") +mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") + +prompt = "a green pear" +negative_prompt = "blurry" +generator = torch.Generator(device=device).manual_seed(42) + +image = dd_pipeline( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=25, + generator=generator, + control_image=control_image, + controlnet_conditioning_scale=0.5, + diffdiff_map=mask, + image=image, + output="images" +)[0] ``` -```out -StableDiffusionXLAutoControlNetInputStep( - Class: AutoPipelineBlocks - - ==================================================================================================== - This pipeline contains blocks that are selected at runtime based on inputs. - Trigger Inputs: ['control_image', 'control_mode'] - ==================================================================================================== - - - Description: Controlnet Input step that prepare the controlnet input. - This is an auto pipeline block that works for both controlnet and controlnet_union. - (it should be called right before the denoise step) - `StableDiffusionXLControlNetUnionInputStep` is called to prepare the controlnet input when `control_mode` and `control_image` are provided. - - `StableDiffusionXLControlNetInputStep` is called to prepare the controlnet input when `control_image` is provided. - if neither `control_mode` nor `control_image` is provided, step will be skipped. - - - Components: - controlnet (`ControlNetUnionModel`) - control_image_processor (`VaeImageProcessor`) - - Sub-Blocks: - • controlnet_union [trigger: control_mode] (StableDiffusionXLControlNetUnionInputStep) - Description: step that prepares inputs for the ControlNetUnion model - - • controlnet [trigger: control_image] (StableDiffusionXLControlNetInputStep) - Description: step that prepare inputs for controlnet +### AutoPipelineBlocks and SequentialPipelineBlocks -) +It is possible to combine the `SDXLDiffDiffControlNetDenoiseStep` and `SDXLDiffDiffDenoiseStep` into a [`AutoPipelineBlocks`]. This is a collection of blocks that automatically determines which block to run based on the input. -``` - -Let's assemble the blocks and run an example using controlnet + differential diffusion. We used a tomato as `control_image`, so you can see that in the output, the right half that transformed into a pear had a tomato-like shape. +For example, use `block_trigger_inputs` to only run the `SDXLDiffDiffControlNetDenoiseStep` block if a `control_image` input is provided. Otherwise, the `SDXLDiffDiffDenoiseStep` is used. ```py ->>> dd_blocks.sub_blocks.insert("controlnet_input", control_input_block, 7) ->>> dd_blocks.sub_blocks["denoise"] = controlnet_denoise_block ->>> ->>> dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") ->>> dd_pipeline.load_default_components(torch_dtype=torch.float16) ->>> dd_pipeline = dd_pipeline.to(device) ->>> ->>> control_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_tomato_canny.jpeg") ->>> image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") ->>> mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") ->>> ->>> prompt = "a green pear" ->>> negative_prompt = "blurry" ->>> generator = torch.Generator(device=device).manual_seed(42) ->>> ->>> image = dd_pipeline( -... prompt=prompt, -... negative_prompt=negative_prompt, -... num_inference_steps=25, -... generator=generator, -... control_image=control_image, -... controlnet_conditioning_scale=0.5, -... diffdiff_map=mask, -... image=image, -... output="images" -... )[0] +class SDXLDiffDiffAutoDenoiseStep(AutoPipelineBlocks): + block_classes = [SDXLDiffDiffControlNetDenoiseStep, SDXLDiffDiffDenoiseStep] + block_names = ["controlnet_denoise", "denoise"] + block_trigger_inputs = ["controlnet_cond", None] ``` -Optionally, We can combine `SDXLDiffDiffControlNetDenoiseStep` and `SDXLDiffDiffDenoiseStep` into a `AutoPipelineBlocks` so that same workflow can work with or without controlnet. - +This gives you the ability to package several workflows into a single [`ModularPipeline`]. Add the `ip_adapter` and `controlnet_input` blocks. ```py ->>> class SDXLDiffDiffAutoDenoiseStep(AutoPipelineBlocks): -... block_classes = [SDXLDiffDiffControlNetDenoiseStep, SDXLDiffDiffDenoiseStep] -... block_names = ["controlnet_denoise", "denoise"] -... block_trigger_inputs = ["controlnet_cond", None] +DIFFDIFF_AUTO_BLOCKS = IMAGE2IMAGE_BLOCKS.copy() +DIFFDIFF_AUTO_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep +DIFFDIFF_AUTO_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"] +DIFFDIFF_AUTO_BLOCKS["denoise"] = SDXLDiffDiffAutoDenoiseStep +DIFFDIFF_AUTO_BLOCKS.insert("ip_adapter", StableDiffusionXLAutoIPAdapterStep, 0) +DIFFDIFF_AUTO_BLOCKS.insert("controlnet_input",StableDiffusionXLControlNetAutoInput, 7) ``` -`SDXLDiffDiffAutoDenoiseStep` will run the ControlNet denoise step if `control_image` input is provided, otherwise it will run the regular denoise step. - - +Call [`SequentialPipelineBlocks.from_blocks_dict`] to create a [`SequentialPipelineBlocks`]. This is a sequence of blocks that are chained together and run one after the other if applicable. - Note that it's perfectly fine not to use `AutoPipelineBlocks`. In fact, we recommend only using `AutoPipelineBlocks` to package your workflow at the end once you've verified all your pipelines work as expected. - - - -Now you can create the differential diffusion preset that works with ip-adapter & controlnet. +Create a [`ModularPipeline`] and load in the model components to run. ```py ->>> DIFFDIFF_AUTO_BLOCKS = IMAGE2IMAGE_BLOCKS.copy() ->>> DIFFDIFF_AUTO_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep ->>> DIFFDIFF_AUTO_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"] ->>> DIFFDIFF_AUTO_BLOCKS["denoise"] = SDXLDiffDiffAutoDenoiseStep ->>> DIFFDIFF_AUTO_BLOCKS.insert("ip_adapter", StableDiffusionXLAutoIPAdapterStep, 0) ->>> DIFFDIFF_AUTO_BLOCKS.insert("controlnet_input",StableDiffusionXLControlNetAutoInput, 7) ->>> ->>> print(DIFFDIFF_AUTO_BLOCKS) +dd_auto_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_AUTO_BLOCKS) +dd_pipeline = dd_auto_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") +dd_pipeline.load_default_components(torch_dtype=torch.float16) ``` -to use +## Share -```py ->>> dd_auto_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_AUTO_BLOCKS) ->>> dd_pipeline = dd_auto_blocks.init_pipeline(...) -``` -## Creating a Modular Repo - -You can easily share your differential diffusion workflow on the Hub by creating a modular repo. This is one created using the code we just wrote together: https://huggingface.co/YiYiXu/modular-diffdiff - -To create a Modular Repo and share on hub, you just need to run `save_pretrained()` along with the `push_to_hub=True` flag. Note that if your pipeline contains custom block, you need to manually upload the code to the hub. But we are working on a command line tool to help you upload it very easily. +Add your [`ModularPipeline`] to the Hub with [`~ModularPipeline.save_pretrained`] and setting the `push_to_hub` argument to `True`. ```py dd_pipeline.save_pretrained("YiYiXu/test_modular_doc", push_to_hub=True) ``` -With a modular repo, it is very easy for the community to use the workflow you just created! Here is an example to use the differential-diffusion pipeline we just created and shared. +Other users can load the [`ModularPipeline`] with [`~ModularPipeline.from_pretrained`]. -```py ->>> from diffusers.modular_pipelines import ModularPipeline, ComponentsManager ->>> import torch ->>> from diffusers.utils import load_image ->>> ->>> repo_id = "YiYiXu/modular-diffdiff-0704" ->>> ->>> components = ComponentsManager() ->>> ->>> diffdiff_pipeline = ModularPipeline.from_pretrained(repo_id, trust_remote_code=True, components_manager=components, collection="diffdiff") ->>> diffdiff_pipeline.load_default_components(torch_dtype=torch.float16) ->>> components.enable_auto_cpu_offload() -``` +It is a good idea to use the [`ComponentManager`] with the pipeline to help you manage the many different components of a pipeline. Once you call [`~ModularPipeline.load_default_components`], the components are registered to the [`ComponentManager`]. -see more usage example on model card. +```py +import torch +from diffusers.utils import load_image +from diffusers.modular_pipelines import ModularPipeline, ComponentsManager -## deploy a mellon node +components = ComponentsManager() -[YIYI TODO: for now, here is an example of mellon node https://huggingface.co/YiYiXu/diff-diff-mellon] +diffdiff_pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-diffdiff-0704", trust_remote_code=True, components_manager=components, collection="diffdiff") +diffdiff_pipeline.load_default_components(torch_dtype=torch.float16) +``` \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/overview.md b/docs/source/en/modular_diffusers/overview.md index c8d2c6755ff2..6f8e00f8a4dd 100644 --- a/docs/source/en/modular_diffusers/overview.md +++ b/docs/source/en/modular_diffusers/overview.md @@ -24,7 +24,7 @@ The Modular Diffusers docs are organized as shown below. ## Quickstart -- A [quickstart](./end_to_end_guide) start for implementing an example workflow with Modular Diffusers. +- A [quickstart](./quickstart) start for implementing an example workflow with Modular Diffusers. ## ModularPipeline diff --git a/docs/source/en/modular_diffusers/quickstart.md b/docs/source/en/modular_diffusers/quickstart.md new file mode 100644 index 000000000000..664699680859 --- /dev/null +++ b/docs/source/en/modular_diffusers/quickstart.md @@ -0,0 +1,356 @@ + + +# Quickstart + +Modular Diffusers is a framework for quickly building flexible and customizable pipelines. At the core of Modular Diffusers are [`ModularPipelineBlocks`] that can be combined with other blocks to adapt to new workflows. The blocks are converted into a [`ModularPipeline`], a friendly user-facing interface developers can use. + +This doc will show you how to implement a [Differential Diffusion](https://differential-diffusion.github.io/) pipeline with the modular framework. + +## ModularPipelineBlocks + +[`ModularPipelineBlocks`] are *definitions* that specify the components, inputs, outputs, and computation logic for a single step in a pipeline. There are four types of blocks. + +- [`PipelineBlock`] is the most basic block for a single step. +- [`SequentialPipelineBlocks`] is a multi-block that composes other blocks linearly. The outputs of one block are the inputs to the next block. +- [`LoopSequentialPipelineBlocks`] is a multi-block that runs iteratively and is designed for iterative workflows. +- [`AutoPipelineBlocks`] is a collection of blocks for different workflows and it selects which block to run based on the input. It is designed to conveniently package multiple workflows into a single pipeline. + +[Differential Diffusion](https://differential-diffusion.github.io/) is an image-to-image workflow. Start with the `IMAGE2IMAGE_BLOCKS` preset, a collection of `ModularPipelineBlocks` for image-to-image generation. + +```py +from diffusers.modular_pipelines.stable_diffusion_xl import IMAGE2IMAGE_BLOCKS +IMAGE2IMAGE_BLOCKS = InsertableDict([ + ("text_encoder", StableDiffusionXLTextEncoderStep), + ("image_encoder", StableDiffusionXLVaeEncoderStep), + ("input", StableDiffusionXLInputStep), + ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep), + ("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep), + ("prepare_add_cond", StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep), + ("denoise", StableDiffusionXLDenoiseStep), + ("decode", StableDiffusionXLDecodeStep) +]) +``` + +## Pipeline and block states + +Modular Diffusers uses *state* to communicate data between blocks. There are two types of states. + +- [`PipelineState`] is a global state with `inputs` provided by the user and `intermediates` (inputs and outputs). The `intermediates` are passed between blocks and can change whereas the `inputs` can't. +- [`BlockState`] is a local view of relevant variables from [`PipelineState`] for an individual block. + +## Customizing blocks + +[Differential Diffusion](https://differential-diffusion.github.io/) differs from standard image-to-image in its `prepare_latents` and `denoise` blocks. All the other blocks can be reused, but you'll need to modify these two. + +Create placeholder `PipelineBlocks` for `prepare_latents` and `denoise` by copying and modifying the existing ones. + +Print the `denoise` block to see that it is composed of [`LoopSequentialPipelineBlocks`] with three sub-blocks, `before_denoiser`, `denoiser`, and `after_denoiser`. Only the `before_denoiser` sub-block needs to be modified to prepare the latent input for the denoiser based on the change map. + +```py +denoise_blocks = IMAGE2IMAGE_BLOCKS["denoise"]() +print(denoise_blocks) +``` + +Replace the `StableDiffusionXLLoopBeforeDenoiser` sub-block with the new `SDXLDiffDiffLoopBeforeDenoiser` block. + +```py +# Copy existing blocks as placeholders +class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): + """Copied from StableDiffusionXLImg2ImgPrepareLatentsStep - will modify later""" + # ... same implementation as StableDiffusionXLImg2ImgPrepareLatentsStep + +class SDXLDiffDiffDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): + block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLLoopDenoiser, StableDiffusionXLLoopAfterDenoiser] + block_names = ["before_denoiser", "denoiser", "after_denoiser"] +``` + +### prepare_latents + +The `prepare_latents` block requires the following changes. + +- a processor to process the change map +- a new `inputs` to accept the user-provided change map +- two new `intermediate_inputs`, `timestep` for precomputing all the latents and `num_inference_steps` to create the mask for updating the image regions +- update the computation in the `__call__` method for processing the change map and creating the masks, and storing it in the [`BlockState`] + +```diff +class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): + @property + def expected_components(self) -> List[ComponentSpec]: + return [ + ComponentSpec("vae", AutoencoderKL), + ComponentSpec("scheduler", EulerDiscreteScheduler), ++ ComponentSpec("mask_processor", VaeImageProcessor, config=FrozenDict({"do_normalize": False, "do_convert_grayscale": True})) + ] + @property + def inputs(self) -> List[Tuple[str, Any]]: + return [ ++ InputParam("diffdiff_map", required=True), + ] + + @property + def intermediate_inputs(self) -> List[InputParam]: + return [ + InputParam("generator"), +- InputParam("latent_timestep", required=True, type_hint=torch.Tensor), ++ InputParam("timesteps", type_hint=torch.Tensor), ++ InputParam("num_inference_steps", type_hint=int), + ] + + @property + def intermediate_outputs(self) -> List[OutputParam]: + return [ ++ OutputParam("original_latents", type_hint=torch.Tensor), ++ OutputParam("diffdiff_masks", type_hint=torch.Tensor), + ] + def __call__(self, components, state: PipelineState): + # ... existing logic ... ++ # Process change map and create masks ++ diffdiff_map = components.mask_processor.preprocess(block_state.diffdiff_map, height=latent_height, width=latent_width) ++ thresholds = torch.arange(block_state.num_inference_steps, dtype=diffdiff_map.dtype) / block_state.num_inference_steps ++ block_state.diffdiff_masks = diffdiff_map > (thresholds + (block_state.denoising_start or 0)) ++ block_state.original_latents = block_state.latents +``` + +### denoise + +The `before_denoiser` sub-block requires the following changes. + +- a new `inputs` to accept a `denoising_start` parameter +- two new `intermediate_inputs` to accept the `original_latents` and `diffdiff_masks` from the `prepare_latents` block +- update the computation in the `__call__` method for applying Differential Diffusion + +```diff +class SDXLDiffDiffLoopBeforeDenoiser(PipelineBlock): + @property + def description(self) -> str: + return ( + "Step within the denoising loop for differential diffusion that prepare the latent input for the denoiser" + ) + ++ @property ++ def inputs(self) -> List[Tuple[str, Any]]: ++ return [ ++ InputParam("denoising_start"), ++ ] + + @property + def intermediate_inputs(self) -> List[str]: + return [ + InputParam("latents", required=True, type_hint=torch.Tensor), ++ InputParam("original_latents", type_hint=torch.Tensor), ++ InputParam("diffdiff_masks", type_hint=torch.Tensor), + ] + + def __call__(self, components, block_state, i, t): ++ # Apply differential diffusion logic ++ if i == 0 and block_state.denoising_start is None: ++ block_state.latents = block_state.original_latents[:1] ++ else: ++ block_state.mask = block_state.diffdiff_masks[i].unsqueeze(0).unsqueeze(1) ++ block_state.latents = block_state.original_latents[i] * block_state.mask + block_state.latents * (1 - block_state.mask) + + # ... rest of existing logic ... +``` + +## Assembling the blocks + +You should have all the blocks you need at this point to create a [`ModularPipeline`]. + +Copy the existing `IMAGE2IMAGE_BLOCKS` preset and for the `set_timesteps` block, use the `set_timesteps` from the `TEXT2IMAGE_BLOCKS` because Differential Diffusion doesn't require a `strength` parameter. + +Set the `prepare_latents` and `denoise` blocks to the `SDXLDiffDiffPrepareLatentsStep` and `SDXLDiffDiffDenoiseStep` blocks you just modified. + +Call [`SequentialPipelineBlocks.from_blocks_dict`] on the blocks to create a `SequentialPipelineBlocks`. + +```py +DIFFDIFF_BLOCKS = IMAGE2IMAGE_BLOCKS.copy() +DIFFDIFF_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"] +DIFFDIFF_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep +DIFFDIFF_BLOCKS["denoise"] = SDXLDiffDiffDenoiseStep + +dd_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_BLOCKS) +print(dd_blocks) +``` + +## ModularPipeline + +Convert the [`SequentialPipelineBlocks`] into a [`ModularPipeline`] with the [`ModularPipeline.init_pipeline`] method. This initializes the expected components to load from a `modular_model_index.json` file. Explicitly load the components by calling [`ModularPipeline.load_default_components`]. + +It is a good idea to initialize the [`ComponentManager`] with the pipeline to help manage the different components. Once you call [`~ModularPipeline.load_default_components`], the components are registered to the [`ComponentManager`] and can be shared between workflows. The example below uses the `collection` argument to assign the components a `"diffdiff"` label for better organization. + +```py +from diffusers.modular_pipelines import ComponentsManager + +components = ComponentManager() + +dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", components_manager=components, collection="diffdiff") +dd_pipeline.load_default_componenets(torch_dtype=torch.float16) +dd_pipeline.to("cuda") +``` + +## Adding workflows + +Other workflows can be added to the [`ModularPipeline`] to support additional features without rewriting the entire pipeline from scratch. + +This section demonstrates how to add an IP-Adapter or ControlNet. + +### IP-Adapter + +Stable Diffusion XL already has a preset IP-Adapter block that you can use and doesn't require any changes to the existing Differential Diffusion pipeline. + +```py +from diffusers.modular_pipelines.stable_diffusion_xl.encoders import StableDiffusionXLAutoIPAdapterStep + +ip_adapter_block = StableDiffusionXLAutoIPAdapterStep() +``` + +Use the [`sub_blocks.insert`] method to insert it into the [`ModularPipeline`]. The example below inserts the `ip_adapter_block` at position `0`. Print the pipeline to see that the `ip_adapter_block` is added and it requires an `ip_adapter_image`. This also added two components to the pipeline, the `image_encoder` and `feature_extractor`. + +```py +dd_blocks.sub_blocks.insert("ip_adapter", ip_adapter_block, 0) +``` + +Call [`~ModularPipeline.init_pipeline`] to initialize a [`ModularPipeline`] and use [`~ModularPipeline.load_default_components`] to load the model components. Load and set the IP-Adapter to run the pipeline. + +```py +dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") +dd_pipeline.load_default_components(torch_dtype=torch.float16) +dd_pipeline.loader.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") +dd_pipeline.loader.set_ip_adapter_scale(0.6) +dd_pipeline = dd_pipeline.to(device) + +ip_adapter_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_orange.jpeg") +image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") +mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") + +prompt = "a green pear" +negative_prompt = "blurry" +generator = torch.Generator(device=device).manual_seed(42) + +image = dd_pipeline( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=25, + generator=generator, + ip_adapter_image=ip_adapter_image, + diffdiff_map=mask, + image=image, + output="images" +)[0] +``` + +### ControlNet + +Stable Diffusion XL already has a preset ControlNet block that can readily be used. + +```py +from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks import StableDiffusionXLAutoControlNetInputStep + +control_input_block = StableDiffusionXLAutoControlNetInputStep() +``` + +However, it requires modifying the `denoise` block because that's where the ControlNet injects the control information into the UNet. + +Modify the `denoise` block by replacing the `StableDiffusionXLLoopDenoiser` sub-block with the `StableDiffusionXLControlNetLoopDenoiser`. + +```py +class SDXLDiffDiffControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): + block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLControlNetLoopDenoiser, StableDiffusionXLDenoiseLoopAfterDenoiser] + block_names = ["before_denoiser", "denoiser", "after_denoiser"] + +controlnet_denoise_block = SDXLDiffDiffControlNetDenoiseStep() +``` + +Insert the `controlnet_input` block and replace the `denoise` block with the new `controlnet_denoise_block`. Initialize a [`ModularPipeline`] and [`~ModularPipeline.load_default_components`] into it. + +```py +dd_blocks.sub_blocks.insert("controlnet_input", control_input_block, 7) +dd_blocks.sub_blocks["denoise"] = controlnet_denoise_block + +dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") +dd_pipeline.load_default_components(torch_dtype=torch.float16) +dd_pipeline = dd_pipeline.to(device) + +control_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_tomato_canny.jpeg") +image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") +mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") + +prompt = "a green pear" +negative_prompt = "blurry" +generator = torch.Generator(device=device).manual_seed(42) + +image = dd_pipeline( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=25, + generator=generator, + control_image=control_image, + controlnet_conditioning_scale=0.5, + diffdiff_map=mask, + image=image, + output="images" +)[0] +``` + +### AutoPipelineBlocks + +The Differential Diffusion, IP-Adapter, and ControlNet workflows can be bundled into a single [`ModularPipeline`] by using [`AutoPipelineBlocks`]. This allows automatically selecting which sub-blocks to run based on the inputs like `control_image` or `ip_adapter_image`. If none of these inputs are passed, then it defaults to the Differential Diffusion. + +Use `block_trigger_inputs` to only run the `SDXLDiffDiffControlNetDenoiseStep` block if a `control_image` input is provided. Otherwise, the `SDXLDiffDiffDenoiseStep` is used. + +```py +class SDXLDiffDiffAutoDenoiseStep(AutoPipelineBlocks): + block_classes = [SDXLDiffDiffControlNetDenoiseStep, SDXLDiffDiffDenoiseStep] + block_names = ["controlnet_denoise", "denoise"] + block_trigger_inputs = ["controlnet_cond", None] +``` + +Add the `ip_adapter` and `controlnet_input` blocks. + +```py +DIFFDIFF_AUTO_BLOCKS = IMAGE2IMAGE_BLOCKS.copy() +DIFFDIFF_AUTO_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep +DIFFDIFF_AUTO_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"] +DIFFDIFF_AUTO_BLOCKS["denoise"] = SDXLDiffDiffAutoDenoiseStep +DIFFDIFF_AUTO_BLOCKS.insert("ip_adapter", StableDiffusionXLAutoIPAdapterStep, 0) +DIFFDIFF_AUTO_BLOCKS.insert("controlnet_input",StableDiffusionXLControlNetAutoInput, 7) +``` + +Call [`SequentialPipelineBlocks.from_blocks_dict`] to create a [`SequentialPipelineBlocks`] and create a [`ModularPipeline`] and load in the model components to run. + +```py +dd_auto_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_AUTO_BLOCKS) +dd_pipeline = dd_auto_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") +dd_pipeline.load_default_components(torch_dtype=torch.float16) +``` + +## Share + +Add your [`ModularPipeline`] to the Hub with [`~ModularPipeline.save_pretrained`] and set `push_to_hub` argument to `True`. + +```py +dd_pipeline.save_pretrained("YiYiXu/test_modular_doc", push_to_hub=True) +``` + +Other users can load the [`ModularPipeline`] with [`~ModularPipeline.from_pretrained`]. + +```py +import torch +from diffusers.modular_pipelines import ModularPipeline, ComponentsManager + +components = ComponentsManager() + +diffdiff_pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-diffdiff-0704", trust_remote_code=True, components_manager=components, collection="diffdiff") +diffdiff_pipeline.load_default_components(torch_dtype=torch.float16) +``` \ No newline at end of file From 10b84fe79a89a157ceaf775f88b1aa736ef7e3e2 Mon Sep 17 00:00:00 2001 From: stevhliu Date: Wed, 16 Jul 2025 10:38:11 -0700 Subject: [PATCH 03/15] state, pipelineblock, apis --- docs/source/en/_toctree.yml | 25 +- .../api/modular_diffusers/pipeline_blocks.md | 21 ++ .../modular_diffusers/pipeline_components.md | 9 + .../api/modular_diffusers/pipeline_states.md | 9 + .../modular_diffusers_states.md | 57 ++-- .../en/modular_diffusers/pipeline_block.md | 265 +++--------------- 6 files changed, 140 insertions(+), 246 deletions(-) create mode 100644 docs/source/en/api/modular_diffusers/pipeline_blocks.md create mode 100644 docs/source/en/api/modular_diffusers/pipeline_components.md create mode 100644 docs/source/en/api/modular_diffusers/pipeline_states.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 133f867a4561..d39f9bb6fe52 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -116,20 +116,20 @@ title: Quickstart - local: modular_diffusers/end_to_end_guide title: End-to-end example - - local: modular_diffusers/modular_pipeline - title: Modular Pipeline - - local: modular_diffusers/components_manager - title: Components Manager - local: modular_diffusers/modular_diffusers_states - title: Modular Diffusers States + title: Block states - local: modular_diffusers/pipeline_block - title: Pipeline Block + title: PipelineBlock - local: modular_diffusers/sequential_pipeline_blocks - title: Sequential Pipeline Blocks + title: SequentialPipelineBlocks - local: modular_diffusers/loop_sequential_pipeline_blocks - title: Loop Sequential Pipeline Blocks + title: LoopSequentialPipelineBlocks - local: modular_diffusers/auto_pipeline_blocks - title: Auto Pipeline Blocks + title: AutoPipelineBlocks + - local: modular_diffusers/modular_pipeline + title: ModularPipeline + - local: modular_diffusers/components_manager + title: Components Manager title: Modular Diffusers - sections: - local: using-diffusers/consisid @@ -299,6 +299,13 @@ isExpanded: false sections: - title: Main Classes + sections: + - local: api/modular_diffusers/pipeline_blocks + title: Pipeline blocks + - local: api/modular_diffusers/pipeline_states + title: Pipeline states + title: Modular Diffusers + - isExpanded: false sections: - local: api/configuration title: Configuration diff --git a/docs/source/en/api/modular_diffusers/pipeline_blocks.md b/docs/source/en/api/modular_diffusers/pipeline_blocks.md new file mode 100644 index 000000000000..fd454fb68c45 --- /dev/null +++ b/docs/source/en/api/modular_diffusers/pipeline_blocks.md @@ -0,0 +1,21 @@ +# Pipeline blocks + +## ModularPipelineBlocks + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipelineBlocks + +## PipelineBlock + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.PipelineBlock + +## SequentialPipelineBlocks + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.SequentialPipelineBlocks + +## LoopSequentialPipelineBlocks + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.LoopSequentialPipelineBlocks + +## AutoPipelineBlocks + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.AutoPipelineBlocks \ No newline at end of file diff --git a/docs/source/en/api/modular_diffusers/pipeline_components.md b/docs/source/en/api/modular_diffusers/pipeline_components.md new file mode 100644 index 000000000000..ee8540ac5e13 --- /dev/null +++ b/docs/source/en/api/modular_diffusers/pipeline_components.md @@ -0,0 +1,9 @@ +# Components and configs + +## ComponentSpec + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ComponentSpec + +## ConfigSpec + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ConfigSpec \ No newline at end of file diff --git a/docs/source/en/api/modular_diffusers/pipeline_states.md b/docs/source/en/api/modular_diffusers/pipeline_states.md new file mode 100644 index 000000000000..341d18ecb41c --- /dev/null +++ b/docs/source/en/api/modular_diffusers/pipeline_states.md @@ -0,0 +1,9 @@ +# Pipeline states + +## PipelineState + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.PipelineState + +## BlockState + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.BlockState \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/modular_diffusers_states.md b/docs/source/en/modular_diffusers/modular_diffusers_states.md index 744089fcf676..1959bb55cd66 100644 --- a/docs/source/en/modular_diffusers/modular_diffusers_states.md +++ b/docs/source/en/modular_diffusers/modular_diffusers_states.md @@ -10,25 +10,25 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# PipelineState and BlockState +# Block states - +Blocks rely on the [`PipelineState`] and [`BlockState`] data structures for communicating and sharing data. -🧪 **Experimental Feature**: Modular Diffusers is an experimental feature we are actively developing. The API may be subject to breaking changes. +| State | Description | +|-------|-------------| +| `PipelineState` | Maintains the overall data required for a pipeline's execution and allows blocks to read and update its data. | +| `BlockState` | Allows each block to perform its computation with the necessary data from `inputs` and `intermediate_inputs` | - +This guide explains how states work and how they connect blocks. -In Modular Diffusers, `PipelineState` and `BlockState` are the core data structures that enable blocks to communicate and share data. The concept is fundamental to understand how blocks interact with each other and the pipeline system. +## PipelineState -In the modular diffusers system, `PipelineState` acts as the global state container that all pipeline blocks operate on. It maintains the complete runtime state of the pipeline and provides a structured way for blocks to read from and write to shared data. +The [`PipelineState`] is a global state container for all pipeline blocks. It maintains the complete runtime state of the pipeline and provides a structured way for blocks to read from and write to shared data. -A `PipelineState` consists of two distinct states: +There are two dict's in [`PipelineState`] for structuring data. -- **The immutable state** (i.e. the `inputs` dict) contains a copy of values provided by users. Once a value is added to the immutable state, it cannot be changed. Blocks can read from the immutable state but cannot write to it. - -- **The mutable state** (i.e. the `intermediates` dict) contains variables that are passed between blocks and can be modified by them. - -Here's an example of what a `PipelineState` looks like: +- The `inputs` dict is an **immutable** state containing a copy of user provided values. A value added to `inputs` cannot be changed. Blocks can read from `inputs` but cannot write to it. +- The `intermediates` dict is a **mutable** state containing variables that are passed between blocks and can be modified by them. ```py PipelineState( @@ -44,9 +44,11 @@ PipelineState( ) ``` -Each pipeline blocks define what parts of that state they can read from and write to through their `inputs`, `intermediate_inputs`, and `intermediate_outputs` properties. At run time, they gets a local view (`BlockState`) of the relevant variables it needs from `PipelineState`, performs its operations, and then updates `PipelineState` with any changes. +## BlockState + +The [`BlockState`] is a local view of the relevant variables, `inputs` and `intermediate_inputs`, that an individual pipeline block needs from [`PipelineState`] for performing it's computations. -For example, if a block defines an input `image`, inside the block's `__call__` method, the `BlockState` would contain: +You can access these variables directly as attributes like `block_state.image`. ```py BlockState( @@ -54,6 +56,29 @@ BlockState( ) ``` -You can access the variables directly as attributes: `block_state.image`. +When a block's `__call__` method is executed, it retrieves the [`BlockState`] with `self.get_block_state(state)`, performs it's operations, and updates [`PipelineState`] with `self.set_block_state(state, block_state)`. + +```py +def __call__(self, components, state): + # retrieve BlockState + block_state = self.get_block_state(state) + + # computation logic on inputs and intermediate_inputs + + # update PipelineState + self.set_block_state(state, block_state) + return components, state +``` + +## State interaction + +[`PipelineState`] and [`BlockState`] interaction is defined by a block's `inputs`, `intermediate_inputs`, and `intermediate_outputs`. + +- `inputs`, a block can modify an input - like `block_state.image` - but the change is local to the [`BlockState`] and won't affect the original image in [`PipelineState`]. +- `intermediate_inputs`, is often values created from a previous block. When a block modifies `intermediate_inputs` - like `batch_size` - this change is reflected in both the [`BlockState`] and [`PipelineState`]. Any subsequent blocks are also affected. + + If a previous block doesn't provide an `intermediate_inputs`, then the pipeline makes it available as a user input. However, the value is still a mutable intermediate state. + +- `intermediate_outputs`, is a new variable that a block creates from `intermediate_inputs`. It is added to the [`PipelineState`]'s `intermediates` dict and available as an `intermediate_inputs` for subsequent blocks or accessed by users as a final output from the pipeline. -We will explore more on how blocks interact with pipeline state through their `inputs`, `intermediate_inputs`, and `intermediate_outputs` properties, see the [PipelineBlock guide](./pipeline_block.md). \ No newline at end of file + If a variable is modified in `block_state` but not declared as an `intermediate_outputs`, it won't be added to [`PipelineState`]. \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/pipeline_block.md b/docs/source/en/modular_diffusers/pipeline_block.md index 17a819732fd0..a007e3519eb1 100644 --- a/docs/source/en/modular_diffusers/pipeline_block.md +++ b/docs/source/en/modular_diffusers/pipeline_block.md @@ -12,83 +12,63 @@ specific language governing permissions and limitations under the License. # PipelineBlock - +[`PipelineBlock`] is the basic block for building a [`ModularPipeline`]. It defines what components, inputs/outputs, and computation a block should perform for a specific step in a pipeline. A [`PipelineBlock`] connects with other blocks, using [state](./modular_diffusers_states), to enable the modular construction of workflows. -🧪 **Experimental Feature**: Modular Diffusers is an experimental feature we are actively developing. The API may be subject to breaking changes. +A [`PipelineBlock`] on it's own can't be run to generate anything. It is a blueprint for what a step should do in a pipeline. To actually run and execute a pipeline, the [`PipelineBlock`] needs to be converted into a [`ModularPipeline`]. - +This guide will show you how to create a [`PipelineBlock`]. -In Modular Diffusers, you build your workflow using `ModularPipelineBlocks`. We support 4 different types of blocks: `PipelineBlock`, `SequentialPipelineBlocks`, `LoopSequentialPipelineBlocks`, and `AutoPipelineBlocks`. Among them, `PipelineBlock` is the most fundamental building block of the whole system - it's like a brick in a Lego system. These blocks are designed to easily connect with each other, allowing for modular construction of creative and potentially very complex workflows. +## Inputs and outputs - +> [!TIP] +> Refer to the [Block states](./modular_diffusers_states) guide if you aren't familiar with how state works in Modular Diffusers. -**Important**: `PipelineBlock`s are definitions/specifications, not runnable pipelines. They define what a block should do and what data it needs, but you need to convert them into a `ModularPipeline` to actually execute them. For information on creating and running pipelines, see the [Modular Pipeline guide](./modular_pipeline.md). +A [`PipelineBlock`] requires `inputs`, `intermediate_inputs`, and `intermediate_outputs`. - +- `inputs` are values provided by a user and they are retrieved from the [`PipelineState`], which means `inputs` can't be modified. This is useful because some workflows resize an image, but the original image is still required. The [`PipelineState`] maintains the original image. -In this tutorial, we will focus on how to write a basic `PipelineBlock` and how it interacts with the pipeline state. + Use `InputParam` to define `inputs`. -## PipelineState + ```py + from diffusers.modular_pipelines import InputParam -Before we dive into creating `PipelineBlock`s, make sure you have a basic understanding of `PipelineState`. It acts as the global state container that all blocks operate on - each block gets a local view (`BlockState`) of the relevant variables it needs from `PipelineState`, performs its operations, and then updates `PipelineState` with any changes. See the [PipelineState and BlockState guide](./modular_diffusers_states.md) for more details. + user_inputs = [ + InputParam(name="image", type_hint="PIL.Image", description="raw input image to process") + ] + ``` -## Define a `PipelineBlock` +- `intermediate_inputs` are values are typically created from a previous block but it can also be directly provided if no preceding block generates them. Unlike `inputs`, `intermediate_inputs` can be modified. -To write a `PipelineBlock` class, you need to define a few properties that determine how your block interacts with the pipeline state. Understanding these properties is crucial - they define what data your block can access and what it can produce. + Use `InputParam` to define `intermediate_inputs`. -The three main properties you need to define are: -- `inputs`: Immutable values from the user that cannot be modified -- `intermediate_inputs`: Mutable values from previous blocks that can be read and modified -- `intermediate_outputs`: New values your block creates for subsequent blocks and user access + ```py + user_intermediate_inputs = [ + InputParam(name="processed_image", type_hint="torch.Tensor", description="image that has been preprocessed and normalized"), + ] + ``` -Let's explore each one and understand how they work with the pipeline state. +- `intermediate_outputs` are new values created by a block and added to the [`PipelineState`]. The `intermediate_outputs` are available as `intermediate_inputs` for subsequent blocks or available as the final output from running the pipeline. -**Inputs: Immutable User Values** + Use `OutputParam` to define `intermediate_outputs`. -Inputs are variables your block needs from the immutable pipeline state - these are user-provided values that cannot be modified by any block. You define them using `InputParam`: + ```py + from diffusers.modular_pipelines import OutputParam -```py -user_inputs = [ - InputParam(name="image", type_hint="PIL.Image", description="raw input image to process") -] -``` - -When you list something as an input, you're saying "I need this value directly from the end user, and I will talk to them directly, telling them what I need in the 'description' field. They will provide it and it will come to me unchanged." - -This is especially useful for raw values that serve as the "source of truth" in your workflow. For example, with a raw image, many workflows require preprocessing steps like resizing that a previous block might have performed. But in many cases, you also want the raw PIL image. In some inpainting workflows, you need the original image to overlay with the generated result for better control and consistency. - -**Intermediate Inputs: Mutable Values from Previous Blocks, or Users** - -Intermediate inputs are variables your block needs from the mutable pipeline state - these are values that can be read and modified. They're typically created by previous blocks, but could also be directly provided by the user if not the case: - -```py -user_intermediate_inputs = [ - InputParam(name="processed_image", type_hint="torch.Tensor", description="image that has been preprocessed and normalized"), -] -``` - -When you list something as an intermediate input, you're saying "I need this value, but I want to work with a different block that has already created it. I already know for sure that I can get it from this other block, but it's okay if other developers want use something different." - -**Intermediate Outputs: New Values for Subsequent Blocks and User Access** - -Intermediate outputs are new variables your block creates and adds to the mutable pipeline state. They serve two purposes: - -1. **For subsequent blocks**: They can be used as intermediate inputs by other blocks in the pipeline -2. **For users**: They become available as final outputs that users can access when running the pipeline + user_intermediate_outputs = [ + OutputParam(name="image_latents", description="latents representing the image") + ] + ``` -```py -user_intermediate_outputs = [ - OutputParam(name="image_latents", description="latents representing the image") -] -``` - -Intermediate inputs and intermediate outputs work together like Lego studs and anti-studs - they're the connection points that make blocks modular. When one block produces an intermediate output, it becomes available as an intermediate input for subsequent blocks. This is where the "modular" nature of the system really shines - blocks can be connected and reconnected in different ways as long as their inputs and outputs match. +The intermediate inputs and outputs work together to connect blocks by sharing data between them. They are accessible at any point, allowing you to track the workflow's progress. -Additionally, all intermediate outputs are accessible to users when they run the pipeline, typically you would only need the final images, but they are also able to access intermediate results like latents, embeddings, or other processing steps. +## Computation logic -**The `__call__` Method Structure** +The computation a block performs is defined in the `__call__` method which follows a specific structure. -Your `PipelineBlock`'s `__call__` method should follow this structure: +1. Retrieve the [`BlockState`] to get a local view of the `inputs` and `intermediate_inputs` it needs from [`PipelineState`]. +2. Implement the computation logic on the `inputs` and `intermediate_inputs`. +3. Update [`PipelineState`] to push changes from the local [`BlockState`] back to the global [`PipelineState`]. +4. Return the components and state which becomes available to the next block. ```py def __call__(self, components, state): @@ -97,39 +77,34 @@ def __call__(self, components, state): # Your computation logic here # block_state contains all your inputs and intermediate_inputs - # You can access them like: block_state.image, block_state.processed_image + # Access them like: block_state.image, block_state.processed_image # Update the pipeline state with your updated block_states self.set_block_state(state, block_state) return components, state ``` -The `block_state` object contains all the variables you defined in `inputs` and `intermediate_inputs`, making them easily accessible for your computation. +### Components and Configs -**Components and Configs** +The components and pipeline-level configs a block needs are specified in [`ComponentSpec`] and [`ConfigSpec`]. -You can define the components and pipeline-level configs your block needs using `ComponentSpec` and `ConfigSpec`: +- [`ComponentSpec`] contains the expected components used by a block. You need the `name` of the component and ideally a `type_hint` that specifies exactly what the component is. +- [`ConfigSpec`] contains pipeline-level settings that control behavior across all blocks. ```py from diffusers import ComponentSpec, ConfigSpec -# Define components your block needs expected_components = [ ComponentSpec(name="unet", type_hint=UNet2DConditionModel), ComponentSpec(name="scheduler", type_hint=EulerDiscreteScheduler) ] -# Define pipeline-level configs expected_config = [ ConfigSpec("force_zeros_for_empty_prompt", True) ] ``` -**Components**: In the `ComponentSpec`, you must provide a `name` and ideally a `type_hint`. You can also specify a `default_creation_method` to indicate whether the component should be loaded from a pretrained model or created with default configurations. The actual loading details (`repo`, `subfolder`, `variant` and `revision` fields) are typically specified when creating the pipeline, as we covered in the [Modular Pipeline Guide](./modular_pipeline.md). - -**Configs**: Pipeline-level settings that control behavior across all blocks. - -When you convert your blocks into a pipeline using `blocks.init_pipeline()`, the pipeline collects all component requirements from the blocks and fetches the loading specs from the modular repository. The components are then made available to your block as the first argument of the `__call__` method. You can access any component you need using dot notation: +When the blocks are converted into a pipeline, the components become available to the block as the first argument in `__call__`. ```py def __call__(self, components, state): @@ -137,156 +112,4 @@ def __call__(self, components, state): unet = components.unet vae = components.vae scheduler = components.scheduler -``` - -That's all you need to define in order to create a `PipelineBlock`. There is no hidden complexity. In fact we are going to create a helper function that take exactly these variables as input and return a pipeline block. We will use this helper function through out the tutorial to create test blocks - -Note that for `__call__` method, the only part you should implement differently is the part between `self.get_block_state()` and `self.set_block_state()`, which can be abstracted into a simple function that takes `block_state` and returns the updated state. Our helper function accepts a `block_fn` that does exactly that. - -**Helper Function** - -```py -from diffusers.modular_pipelines import PipelineBlock, InputParam, OutputParam -import torch - -def make_block(inputs=[], intermediate_inputs=[], intermediate_outputs=[], block_fn=None, description=None): - class TestBlock(PipelineBlock): - model_name = "test" - - @property - def inputs(self): - return inputs - - @property - def intermediate_inputs(self): - return intermediate_inputs - - @property - def intermediate_outputs(self): - return intermediate_outputs - - @property - def description(self): - return description if description is not None else "" - - def __call__(self, components, state): - block_state = self.get_block_state(state) - if block_fn is not None: - block_state = block_fn(block_state, state) - self.set_block_state(state, block_state) - return components, state - - return TestBlock -``` - -## Example: Creating a Simple Pipeline Block - -Let's create a simple block to see how these definitions interact with the pipeline state. To better understand what's happening, we'll print out the states before and after updates to inspect them: - -```py -inputs = [ - InputParam(name="image", type_hint="PIL.Image", description="raw input image to process") -] - -intermediate_inputs = [InputParam(name="batch_size", type_hint=int)] - -intermediate_outputs = [ - OutputParam(name="image_latents", description="latents representing the image") -] - -def image_encoder_block_fn(block_state, pipeline_state): - print(f"pipeline_state (before update): {pipeline_state}") - print(f"block_state (before update): {block_state}") - - # Simulate processing the image - block_state.image = torch.randn(1, 3, 512, 512) - block_state.batch_size = block_state.batch_size * 2 - block_state.processed_image = [torch.randn(1, 3, 512, 512)] * block_state.batch_size - block_state.image_latents = torch.randn(1, 4, 64, 64) - - print(f"block_state (after update): {block_state}") - return block_state - -# Create a block with our definitions -image_encoder_block_cls = make_block( - inputs=inputs, - intermediate_inputs=intermediate_inputs, - intermediate_outputs=intermediate_outputs, - block_fn=image_encoder_block_fn, - description="Encode raw image into its latent presentation" -) -image_encoder_block = image_encoder_block_cls() -pipe = image_encoder_block.init_pipeline() -``` - -Let's check the pipeline's docstring to see what inputs it expects: -```py ->>> print(pipe.doc) -class TestBlock - - Encode raw image into its latent presentation - - Inputs: - - image (`PIL.Image`, *optional*): - raw input image to process - - batch_size (`int`, *optional*): - - Outputs: - - image_latents (`None`): - latents representing the image -``` - -Notice that `batch_size` appears as an input even though we defined it as an intermediate input. This happens because no previous block provided it, so the pipeline makes it available as a user input. However, unlike regular inputs, this value goes directly into the mutable intermediate state. - -Now let's run the pipeline: - -```py -from diffusers.utils import load_image - -image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/image_of_squirrel_painting.png") -state = pipe(image=image, batch_size=2) -print(f"pipeline_state (after update): {state}") -``` -```out -pipeline_state (before update): PipelineState( - inputs={ - image: - }, - intermediates={ - batch_size: 2 - }, -) -block_state (before update): BlockState( - image: - batch_size: 2 -) - -block_state (after update): BlockState( - image: Tensor(dtype=torch.float32, shape=torch.Size([1, 3, 512, 512])) - batch_size: 4 - processed_image: List[4] of Tensors with shapes [torch.Size([1, 3, 512, 512]), torch.Size([1, 3, 512, 512]), torch.Size([1, 3, 512, 512]), torch.Size([1, 3, 512, 512])] - image_latents: Tensor(dtype=torch.float32, shape=torch.Size([1, 4, 64, 64])) -) -pipeline_state (after update): PipelineState( - inputs={ - image: - }, - intermediates={ - batch_size: 4 - image_latents: Tensor(dtype=torch.float32, shape=torch.Size([1, 4, 64, 64])) - }, -) -``` - -**Key Observations:** - -1. **Before the update**: `image` (the input) goes to the immutable inputs dict, while `batch_size` (the intermediate_input) goes to the mutable intermediates dict, and both are available in `block_state`. - -2. **After the update**: - - **`image` (inputs)** changed in `block_state` but not in `pipeline_state` - this change is local to the block only. - - **`batch_size (intermediate_inputs)`** was updated in both `block_state` and `pipeline_state` - this change affects subsequent blocks (we didn't need to declare it as an intermediate output since it was already in the intermediates dict) - - **`image_latents (intermediate_outputs)`** was added to `pipeline_state` because it was declared as an intermediate output - - **`processed_image`** was not added to `pipeline_state` because it wasn't declared as an intermediate output \ No newline at end of file +``` \ No newline at end of file From b49a3383dbcce51b4b5784eed002adfac1c04501 Mon Sep 17 00:00:00 2001 From: stevhliu Date: Wed, 16 Jul 2025 15:55:40 -0700 Subject: [PATCH 04/15] sequential --- docs/source/en/_toctree.yml | 2 + .../sequential_pipeline_blocks.md | 226 ++++++------------ 2 files changed, 81 insertions(+), 147 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index d39f9bb6fe52..966059ab0cca 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -304,6 +304,8 @@ title: Pipeline blocks - local: api/modular_diffusers/pipeline_states title: Pipeline states + - local: api/modular_diffusers/pipeline_components + title: Components and configs title: Modular Diffusers - isExpanded: false sections: diff --git a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md index a683f0d0659a..518e658506ff 100644 --- a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md @@ -12,178 +12,110 @@ specific language governing permissions and limitations under the License. # SequentialPipelineBlocks - +[`SequentialPipelineBlocks`] are a multi-block type that composes other [`PipelineBlocks`] together in a sequence. Data flows linearly from one block to the next using `intermediate_inputs` and `intermediate_outputs`. Each block in [`SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline. -🧪 **Experimental Feature**: Modular Diffusers is an experimental feature we are actively developing. The API may be subject to breaking changes. +This guide shows you how to connect two blocks into a [`SequentialPipelineBlocks`]. - +Create two [`PipelineBlocks`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `intermediate_inputs`. -`SequentialPipelineBlocks` is a subclass of `ModularPipelineBlocks`. Unlike `PipelineBlock`, it is a multi-block that composes other blocks together in sequence, creating modular workflows where data flows from one block to the next. It's one of the most common ways to build complex pipelines by combining simpler building blocks. - - - -Other types of multi-blocks include [AutoPipelineBlocks](auto_pipeline_blocks.md) (for conditional block selection) and [LoopSequentialPipelineBlocks](loop_sequential_pipeline_blocks.md) (for iterative workflows). For information on creating individual blocks, see the [PipelineBlock guide](pipeline_block.md). - -Additionally, like all `ModularPipelineBlocks`, `SequentialPipelineBlocks` are definitions/specifications, not runnable pipelines. You need to convert them into a `ModularPipeline` to actually execute them. For information on creating and running pipelines, see the [Modular Pipeline guide](modular_pipeline.md). - - - -In this tutorial, we will focus on how to create `SequentialPipelineBlocks` and how blocks connect and work together. - -The key insight is that blocks connect through their intermediate inputs and outputs - the "studs and anti-studs" we discussed in the [PipelineBlock guide](pipeline_block.md). When one block produces an intermediate output, it becomes available as an intermediate input for subsequent blocks. - -Let's explore this through an example. We will use the same helper function from the PipelineBlock guide to create blocks. + + ```py from diffusers.modular_pipelines import PipelineBlock, InputParam, OutputParam -import torch -def make_block(inputs=[], intermediate_inputs=[], intermediate_outputs=[], block_fn=None, description=None): - class TestBlock(PipelineBlock): - model_name = "test" - - @property - def inputs(self): - return inputs - - @property - def intermediate_inputs(self): - return intermediate_inputs - - @property - def intermediate_outputs(self): - return intermediate_outputs - - @property - def description(self): - return description if description is not None else "" - - def __call__(self, components, state): - block_state = self.get_block_state(state) - if block_fn is not None: - block_state = block_fn(block_state, state) - self.set_block_state(state, block_state) - return components, state - - return TestBlock +class InputBlock(PipelineBlock): + + @property + def inputs(self): + return [ + InputParam(name="prompt", type_hint=list, description="list of text prompts"), + InputParam(name="num_images_per_prompt", type_hint=int, description="number of images per prompt"), + ] + + @property + def intermediate_inputs(self): + return [] + + @property + def intermediate_outputs(self): + return [ + OutputParam(name="batch_size", description="calculated batch size"), + ] + + @property + def description(self): + return "A block that determines batch_size based on the number of prompts and num_images_per_prompt argument." + + def __call__(self, components, state): + block_state = self.get_block_state(state) + batch_size = len(block_state.prompt) + block_state.batch_size = batch_size * block_state.num_images_per_prompt + self.set_block_state(state, block_state) + return components, state ``` -Let's create a block that produces `batch_size`, which we'll call "input_block": + + ```py -def input_block_fn(block_state, pipeline_state): - - batch_size = len(block_state.prompt) - block_state.batch_size = batch_size * block_state.num_images_per_prompt - - return block_state - -input_block_cls = make_block( - inputs=[ - InputParam(name="prompt", type_hint=list, description="list of text prompts"), - InputParam(name="num_images_per_prompt", type_hint=int, description="number of images per prompt") - ], - intermediate_outputs=[ - OutputParam(name="batch_size", description="calculated batch size") - ], - block_fn=input_block_fn, - description="A block that determines batch_size based on the number of prompts and num_images_per_prompt argument." -) -input_block = input_block_cls() +import torch +from diffusers.modular_pipelines import PipelineBlock, InputParam, OutputParam + +class ImageEncoderBlock(PipelineBlock): + + @property + def inputs(self): + return [ + InputParam(name="image", type_hint="PIL.Image", description="raw input image to process"), + ] + + @property + def intermediate_inputs(self): + return [ + InputParam(name="batch_size", type_hint=int), + ] + + @property + def intermediate_outputs(self): + return [ + OutputParam(name="image_latents", description="latents representing the image"), + ] + + @property + def description(self): + return "Encode raw image into its latent presentation" + + def __call__(self, components, state): + block_state = self.get_block_state(state) + # Simulate processing the image + block_state.image = torch.randn(1, 3, 512, 512) + block_state.batch_size = block_state.batch_size * 2 + block_state.image_latents = torch.randn(1, 4, 64, 64) + self.set_block_state(state, block_state) + return components, state ``` -Now let's create a second block that uses the `batch_size` from the first block: + + -```py -def image_encoder_block_fn(block_state, pipeline_state): - # Simulate processing the image - block_state.image = torch.randn(1, 3, 512, 512) - block_state.batch_size = block_state.batch_size * 2 - block_state.image_latents = torch.randn(1, 4, 64, 64) - return block_state - -image_encoder_block_cls = make_block( - inputs=[ - InputParam(name="image", type_hint="PIL.Image", description="raw input image to process") - ], - intermediate_inputs=[ - InputParam(name="batch_size", type_hint=int) - ], - intermediate_outputs=[ - OutputParam(name="image_latents", description="latents representing the image") - ], - block_fn=image_encoder_block_fn, - description="Encode raw image into its latent presentation" -) -image_encoder_block = image_encoder_block_cls() -``` +Connect the two blocks by defining an [`InsertableDict`] to map the block names to the block instances. Blocks are executed in the order they're registered in `blocks_dict`. -Now let's connect these blocks to create a `SequentialPipelineBlocks`: +Use [`~SequentialPipelineBlocks.from_blocks_dict`] to create a [`SequentialPipelineBlocks`]. ```py from diffusers.modular_pipelines import SequentialPipelineBlocks, InsertableDict -# Define a dict mapping block names to block instances blocks_dict = InsertableDict() blocks_dict["input"] = input_block blocks_dict["image_encoder"] = image_encoder_block -# Create the SequentialPipelineBlocks blocks = SequentialPipelineBlocks.from_blocks_dict(blocks_dict) ``` -Now you have a `SequentialPipelineBlocks` with 2 blocks: +Inspect the sub-blocks in [`SequentialPipelineBlocks`] by calling `blocks`, and for more details about the inputs and outputs, access the `docs` attribute. ```py ->>> blocks -SequentialPipelineBlocks( - Class: ModularPipelineBlocks - - Description: - - - Sub-Blocks: - [0] input (TestBlock) - Description: A block that determines batch_size based on the number of prompts and num_images_per_prompt argument. - - [1] image_encoder (TestBlock) - Description: Encode raw image into its latent presentation - -) -``` - -When you inspect `blocks.doc`, you can see that `batch_size` is not listed as an input. The pipeline automatically detects that the `input_block` can produce `batch_size` for the `image_encoder_block`, so it doesn't ask the user to provide it. - -```py ->>> print(blocks.doc) -class SequentialPipelineBlocks - - Inputs: - - prompt (`None`, *optional*): - - num_images_per_prompt (`None`, *optional*): - - image (`PIL.Image`, *optional*): - raw input image to process - - Outputs: - - batch_size (`None`): - - image_latents (`None`): - latents representing the image -``` - -At runtime, you have data flow like this: - -![Data Flow Diagram](https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/modular_quicktour/Editor%20_%20Mermaid%20Chart-2025-06-30-092631.png) - -**How SequentialPipelineBlocks Works:** - -1. Blocks are executed in the order they're registered in the `blocks_dict` -2. Outputs from one block become available as intermediate inputs to all subsequent blocks -3. The pipeline automatically figures out which values need to be provided by the user and which will be generated by previous blocks -4. Each block maintains its own behavior and operates through its defined interface, while collectively these interfaces determine what the entire pipeline accepts and produces - -What happens within each block follows the same pattern we described earlier: each block gets its own `block_state` with the relevant inputs and intermediate inputs, performs its computation, and updates the pipeline state with its intermediate outputs. \ No newline at end of file +print(blocks) +print(blocks.doc) +``` \ No newline at end of file From cc71c0b97c658ec147e4fc0646bcc6cf5e904550 Mon Sep 17 00:00:00 2001 From: stevhliu Date: Wed, 16 Jul 2025 16:09:41 -0700 Subject: [PATCH 05/15] fix links --- .../modular_diffusers_states.md | 24 +++++++++---------- .../en/modular_diffusers/pipeline_block.md | 20 ++++++++-------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/source/en/modular_diffusers/modular_diffusers_states.md b/docs/source/en/modular_diffusers/modular_diffusers_states.md index 1959bb55cd66..bb9d067d02d4 100644 --- a/docs/source/en/modular_diffusers/modular_diffusers_states.md +++ b/docs/source/en/modular_diffusers/modular_diffusers_states.md @@ -12,20 +12,20 @@ specific language governing permissions and limitations under the License. # Block states -Blocks rely on the [`PipelineState`] and [`BlockState`] data structures for communicating and sharing data. +Blocks rely on the [`~modular_pipelines.PipelineState`] and [`~modular_pipelines.BlockState`] data structures for communicating and sharing data. | State | Description | |-------|-------------| -| `PipelineState` | Maintains the overall data required for a pipeline's execution and allows blocks to read and update its data. | -| `BlockState` | Allows each block to perform its computation with the necessary data from `inputs` and `intermediate_inputs` | +| [`~modular_pipelines.PipelineState`] | Maintains the overall data required for a pipeline's execution and allows blocks to read and update its data. | +| [`~modular_pipelines.BlockState`] | Allows each block to perform its computation with the necessary data from `inputs` and `intermediate_inputs` | This guide explains how states work and how they connect blocks. ## PipelineState -The [`PipelineState`] is a global state container for all pipeline blocks. It maintains the complete runtime state of the pipeline and provides a structured way for blocks to read from and write to shared data. +The [`~modular_pipelines.PipelineState`] is a global state container for all pipeline blocks. It maintains the complete runtime state of the pipeline and provides a structured way for blocks to read from and write to shared data. -There are two dict's in [`PipelineState`] for structuring data. +There are two dict's in [`~modular_pipelines.PipelineState`] for structuring data. - The `inputs` dict is an **immutable** state containing a copy of user provided values. A value added to `inputs` cannot be changed. Blocks can read from `inputs` but cannot write to it. - The `intermediates` dict is a **mutable** state containing variables that are passed between blocks and can be modified by them. @@ -46,7 +46,7 @@ PipelineState( ## BlockState -The [`BlockState`] is a local view of the relevant variables, `inputs` and `intermediate_inputs`, that an individual pipeline block needs from [`PipelineState`] for performing it's computations. +The [`~modular_pipelines.BlockState`] is a local view of the relevant variables, `inputs` and `intermediate_inputs`, that an individual pipeline block needs from [`~modular_pipelines.PipelineState`] for performing it's computations. You can access these variables directly as attributes like `block_state.image`. @@ -56,7 +56,7 @@ BlockState( ) ``` -When a block's `__call__` method is executed, it retrieves the [`BlockState`] with `self.get_block_state(state)`, performs it's operations, and updates [`PipelineState`] with `self.set_block_state(state, block_state)`. +When a block's `__call__` method is executed, it retrieves the [`BlockState`] with `self.get_block_state(state)`, performs it's operations, and updates [`~modular_pipelines.PipelineState`] with `self.set_block_state(state, block_state)`. ```py def __call__(self, components, state): @@ -72,13 +72,13 @@ def __call__(self, components, state): ## State interaction -[`PipelineState`] and [`BlockState`] interaction is defined by a block's `inputs`, `intermediate_inputs`, and `intermediate_outputs`. +[`~modular_pipelines.PipelineState`] and [`BlockState`] interaction is defined by a block's `inputs`, `intermediate_inputs`, and `intermediate_outputs`. -- `inputs`, a block can modify an input - like `block_state.image` - but the change is local to the [`BlockState`] and won't affect the original image in [`PipelineState`]. -- `intermediate_inputs`, is often values created from a previous block. When a block modifies `intermediate_inputs` - like `batch_size` - this change is reflected in both the [`BlockState`] and [`PipelineState`]. Any subsequent blocks are also affected. +- `inputs`, a block can modify an input - like `block_state.image` - but the change is local to the [`~modular_pipelines.BlockState`] and won't affect the original image in [`~modular_pipelines.PipelineState`]. +- `intermediate_inputs`, is often values created from a previous block. When a block modifies `intermediate_inputs` - like `batch_size` - this change is reflected in both the [`~modular_pipelines.BlockState`] and [`~modular_pipelines.PipelineState`]. Any subsequent blocks are also affected. If a previous block doesn't provide an `intermediate_inputs`, then the pipeline makes it available as a user input. However, the value is still a mutable intermediate state. -- `intermediate_outputs`, is a new variable that a block creates from `intermediate_inputs`. It is added to the [`PipelineState`]'s `intermediates` dict and available as an `intermediate_inputs` for subsequent blocks or accessed by users as a final output from the pipeline. +- `intermediate_outputs`, is a new variable that a block creates from `intermediate_inputs`. It is added to the [`~modular_pipelines.PipelineState`]'s `intermediates` dict and available as an `intermediate_inputs` for subsequent blocks or accessed by users as a final output from the pipeline. - If a variable is modified in `block_state` but not declared as an `intermediate_outputs`, it won't be added to [`PipelineState`]. \ No newline at end of file + If a variable is modified in `block_state` but not declared as an `intermediate_outputs`, it won't be added to [`~modular_pipelines.PipelineState`]. \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/pipeline_block.md b/docs/source/en/modular_diffusers/pipeline_block.md index a007e3519eb1..fd553cee9454 100644 --- a/docs/source/en/modular_diffusers/pipeline_block.md +++ b/docs/source/en/modular_diffusers/pipeline_block.md @@ -12,20 +12,20 @@ specific language governing permissions and limitations under the License. # PipelineBlock -[`PipelineBlock`] is the basic block for building a [`ModularPipeline`]. It defines what components, inputs/outputs, and computation a block should perform for a specific step in a pipeline. A [`PipelineBlock`] connects with other blocks, using [state](./modular_diffusers_states), to enable the modular construction of workflows. +[`~modular_pipelines.PipelineBlock`] is the basic block for building a [`ModularPipeline`]. It defines what components, inputs/outputs, and computation a block should perform for a specific step in a pipeline. A [`~modular_pipelines.PipelineBlock`] connects with other blocks, using [state](./modular_diffusers_states), to enable the modular construction of workflows. -A [`PipelineBlock`] on it's own can't be run to generate anything. It is a blueprint for what a step should do in a pipeline. To actually run and execute a pipeline, the [`PipelineBlock`] needs to be converted into a [`ModularPipeline`]. +A [`~modular_pipelines.PipelineBlock`] on it's own can't be run to generate anything. It is a blueprint for what a step should do in a pipeline. To actually run and execute a pipeline, the [`~modular_pipelines.PipelineBlock`] needs to be converted into a [`ModularPipeline`]. -This guide will show you how to create a [`PipelineBlock`]. +This guide will show you how to create a [`~modular_pipelines.PipelineBlock`]. ## Inputs and outputs > [!TIP] > Refer to the [Block states](./modular_diffusers_states) guide if you aren't familiar with how state works in Modular Diffusers. -A [`PipelineBlock`] requires `inputs`, `intermediate_inputs`, and `intermediate_outputs`. +A [`~modular_pipelines.PipelineBlock`] requires `inputs`, `intermediate_inputs`, and `intermediate_outputs`. -- `inputs` are values provided by a user and they are retrieved from the [`PipelineState`], which means `inputs` can't be modified. This is useful because some workflows resize an image, but the original image is still required. The [`PipelineState`] maintains the original image. +- `inputs` are values provided by a user and they are retrieved from the [`~modular_pipelines.PipelineState`], which means `inputs` can't be modified. This is useful because some workflows resize an image, but the original image is still required. The [`~modular_pipelines.PipelineState`] maintains the original image. Use `InputParam` to define `inputs`. @@ -47,7 +47,7 @@ A [`PipelineBlock`] requires `inputs`, `intermediate_inputs`, and `intermediate_ ] ``` -- `intermediate_outputs` are new values created by a block and added to the [`PipelineState`]. The `intermediate_outputs` are available as `intermediate_inputs` for subsequent blocks or available as the final output from running the pipeline. +- `intermediate_outputs` are new values created by a block and added to the [`~modular_pipelines.PipelineState`]. The `intermediate_outputs` are available as `intermediate_inputs` for subsequent blocks or available as the final output from running the pipeline. Use `OutputParam` to define `intermediate_outputs`. @@ -65,9 +65,9 @@ The intermediate inputs and outputs work together to connect blocks by sharing d The computation a block performs is defined in the `__call__` method which follows a specific structure. -1. Retrieve the [`BlockState`] to get a local view of the `inputs` and `intermediate_inputs` it needs from [`PipelineState`]. +1. Retrieve the [`~modular_pipelines.BlockState`] to get a local view of the `inputs` and `intermediate_inputs` it needs from [`~modular_pipelines.PipelineState`]. 2. Implement the computation logic on the `inputs` and `intermediate_inputs`. -3. Update [`PipelineState`] to push changes from the local [`BlockState`] back to the global [`PipelineState`]. +3. Update [`~modular_pipelines.PipelineState`] to push changes from the local [`~modular_pipelines.BlockState`] back to the global [`~modular_pipelines.PipelineState`]. 4. Return the components and state which becomes available to the next block. ```py @@ -86,10 +86,10 @@ def __call__(self, components, state): ### Components and Configs -The components and pipeline-level configs a block needs are specified in [`ComponentSpec`] and [`ConfigSpec`]. +The components and pipeline-level configs a block needs are specified in [`ComponentSpec`] and [`~modular_pipelines.ConfigSpec`]. - [`ComponentSpec`] contains the expected components used by a block. You need the `name` of the component and ideally a `type_hint` that specifies exactly what the component is. -- [`ConfigSpec`] contains pipeline-level settings that control behavior across all blocks. +- [`~modular_pipelines.ConfigSpec`] contains pipeline-level settings that control behavior across all blocks. ```py from diffusers import ComponentSpec, ConfigSpec From 274289faeb7c575a99ed7deb4fd7ea4df42850ae Mon Sep 17 00:00:00 2001 From: stevhliu Date: Thu, 17 Jul 2025 08:16:05 -0700 Subject: [PATCH 06/15] new --- docs/source/en/_toctree.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 966059ab0cca..5fec56906330 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -112,6 +112,7 @@ sections: - local: modular_diffusers/overview title: Overview + new: true - local: modular_diffusers/quickstart title: Quickstart - local: modular_diffusers/end_to_end_guide From a5aa75c8a422b9739a42894c3ce7da6f8b764d20 Mon Sep 17 00:00:00 2001 From: stevhliu Date: Thu, 17 Jul 2025 16:15:05 -0700 Subject: [PATCH 07/15] loop, auto --- docs/source/en/_toctree.yml | 1 - .../modular_diffusers/auto_pipeline_blocks.md | 361 ++++++------------ .../loop_sequential_pipeline_blocks.md | 143 +------ .../sequential_pipeline_blocks.md | 10 +- 4 files changed, 133 insertions(+), 382 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 5fec56906330..966059ab0cca 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -112,7 +112,6 @@ sections: - local: modular_diffusers/overview title: Overview - new: true - local: modular_diffusers/quickstart title: Quickstart - local: modular_diffusers/end_to_end_guide diff --git a/docs/source/en/modular_diffusers/auto_pipeline_blocks.md b/docs/source/en/modular_diffusers/auto_pipeline_blocks.md index 50c3250512d1..b17766ecf5e1 100644 --- a/docs/source/en/modular_diffusers/auto_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/auto_pipeline_blocks.md @@ -12,83 +12,124 @@ specific language governing permissions and limitations under the License. # AutoPipelineBlocks - +[`~modular_pipelines.AutoPipelineBlocks`] are a multi-block type containing blocks that support different workflows. It automatically selects which sub-blocks to run based on the input provided at runtime. This is typically used to package multiple workflows - text-to-image, image-to-image, inpaint - into a single pipeline for convenience. -🧪 **Experimental Feature**: Modular Diffusers is an experimental feature we are actively developing. The API may be subject to breaking changes. +This guide shows how to create [`~modular_pipelines.AutoPipelineBlocks`]. - +Create three [``~modular_pipelines.PipelineBlocks`] for text-to-image, image-to-image, and inpainting. These represent the different workflows available in the pipeline. -`AutoPipelineBlocks` is a subclass of `ModularPipelineBlocks`. It is a multi-block that automatically selects which sub-blocks to run based on the inputs provided at runtime, creating conditional workflows that adapt to different scenarios. The main purpose is convenience and portability - for developers, you can package everything into one workflow, making it easier to share and use. + + -In this tutorial, we will show you how to create an `AutoPipelineBlocks` and learn more about how the conditional selection works. - - +```py +import torch +from diffusers.modular_pipelines import PipelineBlock, InputParam, OutputParam -Other types of multi-blocks include [SequentialPipelineBlocks](sequential_pipeline_blocks.md) (for linear workflows) and [LoopSequentialPipelineBlocks](loop_sequential_pipeline_blocks.md) (for iterative workflows). For information on creating individual blocks, see the [PipelineBlock guide](pipeline_block.md). +class TextToImageBlock(PipelineBlock): + model_name = "text2img" + + @property + def inputs(self): + return [InputParam(name="prompt")] + + @property + def intermediate_inputs(self): + return [] + + @property + def intermediate_outputs(self): + return [] + + @property + def description(self): + return "I'm a text-to-image workflow!" + + def __call__(self, components, state): + block_state = self.get_block_state(state) + print("running the text-to-image workflow") + # Add your text-to-image logic here + # For example: generate image from prompt + self.set_block_state(state, block_state) + return components, state +``` -Additionally, like all `ModularPipelineBlocks`, `AutoPipelineBlocks` are definitions/specifications, not runnable pipelines. You need to convert them into a `ModularPipeline` to actually execute them. For information on creating and running pipelines, see the [Modular Pipeline guide](modular_pipeline.md). - + + -For example, you might want to support text-to-image and image-to-image tasks. Instead of creating two separate pipelines, you can create an `AutoPipelineBlocks` that automatically chooses the workflow based on whether an `image` input is provided. +```py +class ImageToImageBlock(PipelineBlock): + model_name = "img2img" + + @property + def inputs(self): + return [InputParam(name="prompt"), InputParam(name="image")] + + @property + def intermediate_inputs(self): + return [] + + @property + def intermediate_outputs(self): + return [] + + @property + def description(self): + return "I'm an image-to-image workflow!" + + def __call__(self, components, state): + block_state = self.get_block_state(state) + print("running the image-to-image workflow") + # Add your image-to-image logic here + # For example: transform input image based on prompt + self.set_block_state(state, block_state) + return components, state +``` -Let's see an example. We'll use the helper function from the [PipelineBlock guide](./pipeline_block.md) to create our blocks: -**Helper Function** + + ```py -from diffusers.modular_pipelines import PipelineBlock, InputParam, OutputParam -import torch - -def make_block(inputs=[], intermediate_inputs=[], intermediate_outputs=[], block_fn=None, description=None): - class TestBlock(PipelineBlock): - model_name = "test" - - @property - def inputs(self): - return inputs - - @property - def intermediate_inputs(self): - return intermediate_inputs - - @property - def intermediate_outputs(self): - return intermediate_outputs - - @property - def description(self): - return description if description is not None else "" - - def __call__(self, components, state): - block_state = self.get_block_state(state) - if block_fn is not None: - block_state = block_fn(block_state, state) - self.set_block_state(state, block_state) - return components, state +class InpaintBlock(PipelineBlock): + model_name = "inpaint" + + @property + def inputs(self): + return [InputParam(name="prompt"), InputParam(name="image"), InputParam(name="mask")] + + @property + def intermediate_inputs(self): + return [] + + @property + def intermediate_outputs(self): + return [] - return TestBlock + @property + def description(self): + return "I'm an inpaint workflow!" + + def __call__(self, components, state): + block_state = self.get_block_state(state) + print("running the inpaint workflow") + # Add your inpainting logic here + # For example: fill masked areas based on prompt + self.set_block_state(state, block_state) + return components, state ``` -Now let's create a dummy `AutoPipelineBlocks` that includes dummy text-to-image, image-to-image, and inpaint pipelines. + + +Create an [`~modular_pipelines.AutoPipelineBlocks`] class that includes a list of the sub-block classes and their corresponding block names. -```py -from diffusers.modular_pipelines import AutoPipelineBlocks +You also need to include `block_trigger_inputs`, a list of input names that trigger the corresponding block. If a trigger input is provided at runtime, then that block is selected to run. Use `None` to specify the default block to run if no trigger inputs are detected. -# These are dummy blocks and we only focus on "inputs" for our purpose -inputs = [InputParam(name="prompt")] -# block_fn prints out which workflow is running so we can see the execution order at runtime -block_fn = lambda x, y: print("running the text-to-image workflow") -block_t2i_cls = make_block(inputs=inputs, block_fn=block_fn, description="I'm a text-to-image workflow!") +Lastly, it is important to include a `description` that clearly explains which inputs trigger which workflow. This helps users understand how to run specific workflows. -inputs = [InputParam(name="prompt"), InputParam(name="image")] -block_fn = lambda x, y: print("running the image-to-image workflow") -block_i2i_cls = make_block(inputs=inputs, block_fn=block_fn, description="I'm a image-to-image workflow!") - -inputs = [InputParam(name="prompt"), InputParam(name="image"), InputParam(name="mask")] -block_fn = lambda x, y: print("running the inpaint workflow") -block_inpaint_cls = make_block(inputs=inputs, block_fn=block_fn, description="I'm a inpaint workflow!") +```py +from diffusers.modular_pipelines import AutoPipelineBlocks class AutoImageBlocks(AutoPipelineBlocks): # List of sub-block classes to choose from @@ -101,7 +142,7 @@ class AutoImageBlocks(AutoPipelineBlocks): # - if none of above, runs the text2img workflow (default) block_trigger_inputs = ["mask", "image", None] # Description is extremely important for AutoPipelineBlocks - @property + def description(self): return ( "Pipeline generates images given different types of conditions!\n" @@ -110,207 +151,19 @@ class AutoImageBlocks(AutoPipelineBlocks): + " - img2img workflow is run when `image` is provided (but only when `mask` is not provided).\n" + " - text2img workflow is run when neither `image` nor `mask` is provided.\n" ) - -# Create the blocks -auto_blocks = AutoImageBlocks() -# convert to pipeline -auto_pipeline = auto_blocks.init_pipeline() -``` - -Now we have created an `AutoPipelineBlocks` that contains 3 sub-blocks. Notice the warning message at the top - this automatically appears in every `ModularPipelineBlocks` that contains `AutoPipelineBlocks` to remind end users that dynamic block selection happens at runtime. - -```py -AutoImageBlocks( - Class: AutoPipelineBlocks - - ==================================================================================================== - This pipeline contains blocks that are selected at runtime based on inputs. - Trigger Inputs: ['mask', 'image'] - ==================================================================================================== - - - Description: Pipeline generates images given different types of conditions! - This is an auto pipeline block that works for text2img, img2img and inpainting tasks. - - inpaint workflow is run when `mask` is provided. - - img2img workflow is run when `image` is provided (but only when `mask` is not provided). - - text2img workflow is run when neither `image` nor `mask` is provided. - - - - Sub-Blocks: - • inpaint [trigger: mask] (TestBlock) - Description: I'm a inpaint workflow! - - • img2img [trigger: image] (TestBlock) - Description: I'm a image-to-image workflow! - - • text2img [default] (TestBlock) - Description: I'm a text-to-image workflow! - -) -``` - -Check out the documentation with `print(auto_pipeline.doc)`: - -```py ->>> print(auto_pipeline.doc) -class AutoImageBlocks - - Pipeline generates images given different types of conditions! - This is an auto pipeline block that works for text2img, img2img and inpainting tasks. - - inpaint workflow is run when `mask` is provided. - - img2img workflow is run when `image` is provided (but only when `mask` is not provided). - - text2img workflow is run when neither `image` nor `mask` is provided. - - Inputs: - - prompt (`None`, *optional*): - - image (`None`, *optional*): - - mask (`None`, *optional*): -``` - -There is a fundamental trade-off of AutoPipelineBlocks: it trades clarity for convenience. While it is really easy for packaging multiple workflows, it can become confusing without proper documentation. e.g. if we just throw a pipeline at you and tell you that it contains 3 sub-blocks and takes 3 inputs `prompt`, `image` and `mask`, and ask you to run an image-to-image workflow: if you don't have any prior knowledge on how these pipelines work, you would be pretty clueless, right? - -This pipeline we just made though, has a docstring that shows all available inputs and workflows and explains how to use each with different inputs. So it's really helpful for users. For example, it's clear that you need to pass `image` to run img2img. This is why the description field is absolutely critical for AutoPipelineBlocks. We highly recommend you to explain the conditional logic very well for each `AutoPipelineBlocks` you would make. We also recommend to always test individual pipelines first before packaging them into AutoPipelineBlocks. - -Let's run this auto pipeline with different inputs to see if the conditional logic works as described. Remember that we have added `print` in each `PipelineBlock`'s `__call__` method to print out its workflow name, so it should be easy to tell which one is running: - -```py ->>> _ = auto_pipeline(image="image", mask="mask") -running the inpaint workflow ->>> _ = auto_pipeline(image="image") -running the image-to-image workflow ->>> _ = auto_pipeline(prompt="prompt") -running the text-to-image workflow ->>> _ = auto_pipeline(image="prompt", mask="mask") -running the inpaint workflow -``` - -However, even with documentation, it can become very confusing when AutoPipelineBlocks are combined with other blocks. The complexity grows quickly when you have nested AutoPipelineBlocks or use them as sub-blocks in larger pipelines. - -Let's make another `AutoPipelineBlocks` - this one only contains one block, and it does not include `None` in its `block_trigger_inputs` (which corresponds to the default block to run when none of the trigger inputs are provided). This means this block will be skipped if the trigger input (`ip_adapter_image`) is not provided at runtime. - -```py -from diffusers.modular_pipelines import SequentialPipelineBlocks, InsertableDict -inputs = [InputParam(name="ip_adapter_image")] -block_fn = lambda x, y: print("running the ip-adapter workflow") -block_ipa_cls = make_block(inputs=inputs, block_fn=block_fn, description="I'm a IP-adapter workflow!") - -class AutoIPAdapter(AutoPipelineBlocks): - block_classes = [block_ipa_cls] - block_names = ["ip-adapter"] - block_trigger_inputs = ["ip_adapter_image"] - @property - def description(self): - return "Run IP Adapter step if `ip_adapter_image` is provided." -``` - -Now let's combine these 2 auto blocks together into a `SequentialPipelineBlocks`: - -```py -auto_ipa_blocks = AutoIPAdapter() -blocks_dict = InsertableDict() -blocks_dict["ip-adapter"] = auto_ipa_blocks -blocks_dict["image-generation"] = auto_blocks -all_blocks = SequentialPipelineBlocks.from_blocks_dict(blocks_dict) -pipeline = all_blocks.init_pipeline() ``` -Let's take a look: now things get more confusing. In this particular example, you could still try to explain the conditional logic in the `description` field here - there are only 4 possible execution paths so it's doable. However, since this is a `SequentialPipelineBlocks` that could contain many more blocks, the complexity can quickly get out of hand as the number of blocks increases. - -```py ->>> all_blocks -SequentialPipelineBlocks( - Class: ModularPipelineBlocks - - ==================================================================================================== - This pipeline contains blocks that are selected at runtime based on inputs. - Trigger Inputs: ['image', 'mask', 'ip_adapter_image'] - Use `get_execution_blocks()` with input names to see selected blocks (e.g. `get_execution_blocks('image')`). - ==================================================================================================== - - - Description: - - - Sub-Blocks: - [0] ip-adapter (AutoIPAdapter) - Description: Run IP Adapter step if `ip_adapter_image` is provided. - +It is **very** important to include a `description` to avoid any confusion over how to run a block and what inputs are required. While [`~modular_pipelines.AutoPipelineBlocks`] are convenient, it's conditional logic may be difficult to figure out if it isn't properly explained. - [1] image-generation (AutoImageBlocks) - Description: Pipeline generates images given different types of conditions! - This is an auto pipeline block that works for text2img, img2img and inpainting tasks. - - inpaint workflow is run when `mask` is provided. - - img2img workflow is run when `image` is provided (but only when `mask` is not provided). - - text2img workflow is run when neither `image` nor `mask` is provided. - - -) - -``` - -This is when the `get_execution_blocks()` method comes in handy - it basically extracts a `SequentialPipelineBlocks` that only contains the blocks that are actually run based on your inputs. - -Let's try some examples: - -`mask`: we expect it to skip the first ip-adapter since `ip_adapter_image` is not provided, and then run the inpaint for the second block. +Create an instance of `AutoImageBlocks` and use [`~modular_pipelines.ModularPipeline.init_pipeline`] to convert it to a pipeline. ```py ->>> all_blocks.get_execution_blocks('mask') -SequentialPipelineBlocks( - Class: ModularPipelineBlocks - - Description: - - - Sub-Blocks: - [0] image-generation (TestBlock) - Description: I'm a inpaint workflow! - -) -``` - -Let's also actually run the pipeline to confirm: - -```py ->>> _ = pipeline(mask="mask") -skipping auto block: AutoIPAdapter -running the inpaint workflow +auto_blocks = AutoImageBlocks() +auto_pipeline = auto_blocks.init_pipeline() ``` -Try a few more: +For more complex compositions, nested [`~modular_pipelines.AutoPipelineBlocks`] blocks when they're used as sub-blocks in larger pipelines, use the [`~modular_pipelines.PipelineBlocks.get_execution_blocks`] method to extract the a block that is actually run based on your input. ```py -print(f"inputs: ip_adapter_image:") -blocks_select = all_blocks.get_execution_blocks('ip_adapter_image') -print(f"expected_execution_blocks: {blocks_select}") -print(f"actual execution blocks:") -_ = pipeline(ip_adapter_image="ip_adapter_image", prompt="prompt") -# expect to see ip-adapter + text2img - -print(f"inputs: image:") -blocks_select = all_blocks.get_execution_blocks('image') -print(f"expected_execution_blocks: {blocks_select}") -print(f"actual execution blocks:") -_ = pipeline(image="image", prompt="prompt") -# expect to see img2img - -print(f"inputs: prompt:") -blocks_select = all_blocks.get_execution_blocks('prompt') -print(f"expected_execution_blocks: {blocks_select}") -print(f"actual execution blocks:") -_ = pipeline(prompt="prompt") -# expect to see text2img (prompt is not a trigger input so fallback to default) - -print(f"inputs: mask + ip_adapter_image:") -blocks_select = all_blocks.get_execution_blocks('mask','ip_adapter_image') -print(f"expected_execution_blocks: {blocks_select}") -print(f"actual execution blocks:") -_ = pipeline(mask="mask", ip_adapter_image="ip_adapter_image") -# expect to see ip-adapter + inpaint -``` - -In summary, `AutoPipelineBlocks` is a good tool for packaging multiple workflows into a single, convenient interface and it can greatly simplify the user experience. However, always provide clear descriptions explaining the conditional logic, test individual pipelines first before combining them, and use `get_execution_blocks()` to understand runtime behavior in complex compositions. \ No newline at end of file +auto_blocks.get_execution_blocks("mask") +``` \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md index e95cdc7163b4..b3541267685d 100644 --- a/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md @@ -12,63 +12,18 @@ specific language governing permissions and limitations under the License. # LoopSequentialPipelineBlocks - +[`~modular_pipelines.LoopSequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.PipelineBlocks`] together in a loop. Data flows circularly, using `intermediate_inputs` and `intermediate_outputs`, and each block is run iteratively. This is typically used to create a denoising loop which is iterative by default. -🧪 **Experimental Feature**: Modular Diffusers is an experimental feature we are actively developing. The API may be subject to breaking changes. +This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. - +## Loop wrapper -`LoopSequentialPipelineBlocks` is a subclass of `ModularPipelineBlocks`. It is a multi-block that composes other blocks together in a loop, creating iterative workflows where blocks run multiple times with evolving state. It's particularly useful for denoising loops requiring repeated execution of the same blocks. +[`~modular_pipelines.LoopSequentialPipelineBlocks`], is also known as the *loop wrapper* because defines the loop structure, iteration variables, and configuration. Within the loop wrapper, define the following variables. - - -Other types of multi-blocks include [SequentialPipelineBlocks](./sequential_pipeline_blocks.md) (for linear workflows) and [AutoPipelineBlocks](./auto_pipeline_blocks.md) (for conditional block selection). For information on creating individual blocks, see the [PipelineBlock guide](./pipeline_block.md). - -Additionally, like all `ModularPipelineBlocks`, `LoopSequentialPipelineBlocks` are definitions/specifications, not runnable pipelines. You need to convert them into a `ModularPipeline` to actually execute them. For information on creating and running pipelines, see the [Modular Pipeline guide](modular_pipeline.md). - - - -You could create a loop using `PipelineBlock` like this: - -```python -class DenoiseLoop(PipelineBlock): - def __call__(self, components, state): - block_state = self.get_block_state(state) - for t in range(block_state.num_inference_steps): - # ... loop logic here - pass - self.set_block_state(state, block_state) - return components, state -``` - -But in this tutorial, we will focus on how to use `LoopSequentialPipelineBlocks` to create a "composable" denoising loop where you can add or remove blocks within the loop or reuse the same loop structure with different block combinations. - -It involves two parts: a **loop wrapper** and **loop blocks** - -* The **loop wrapper** (`LoopSequentialPipelineBlocks`) defines the loop structure, e.g. it defines the iteration variables, and loop configurations such as progress bar. - -* The **loop blocks** are basically standard pipeline blocks you add to the loop wrapper. - - they run sequentially for each iteration of the loop - - they receive the current iteration index as an additional parameter - - they share the same block_state throughout the entire loop - -Unlike regular `SequentialPipelineBlocks` where each block gets its own state, loop blocks share a single state that persists and evolves across iterations. - -We will build a simple loop block to demonstrate these concepts. Creating a loop block involves three steps: -1. defining the loop wrapper class -2. creating the loop blocks -3. adding the loop blocks to the loop wrapper class to create the loop wrapper instance - -**Step 1: Define the Loop Wrapper** - -To create a `LoopSequentialPipelineBlocks` class, you need to define: - -* `loop_inputs`: User input variables (equivalent to `PipelineBlock.inputs`) -* `loop_intermediate_inputs`: Intermediate variables needed from the mutable pipeline state (equivalent to `PipelineBlock.intermediates_inputs`) -* `loop_intermediate_outputs`: New intermediate variables this block will add to the mutable pipeline state (equivalent to `PipelineBlock.intermediates_outputs`) -* `__call__` method: Defines the loop structure and iteration logic - -Here is an example of a loop wrapper: +- `loop_inputs` are user provided values and equivalent to [`~modular_pipelines.PipelineBlock.inputs`]. +- `loop_intermediate_inputs` are intermediate variables from the [`~modular_pipelines.PipelineState`] and equivalent to [`~modular_pipelines.PipelineBlock.intermediate_inputs`]. +- `loop_intermediate_outputs` are new intermediate variables created by the block and added to the [`~modular_pipelines.PipelineState`]. It is equivalent to [`~modular_pipelines.PipelineBlock.intermediate_outputs`]. +- `__call__` method defines the loop structure and iteration logic. ```py import torch @@ -93,16 +48,20 @@ class LoopWrapper(LoopSequentialPipelineBlocks): return components, state ``` -**Step 2: Create Loop Blocks** +The loop wrapper can pass additional arguments, like current iteration index, to the loop blocks. -Loop blocks are standard `PipelineBlock`s, but their `__call__` method works differently: -* It receives the iteration variable (e.g., `i`) passed by the loop wrapper -* It works directly with `block_state` instead of pipeline state -* No need to call `self.get_block_state()` or `self.set_block_state()` +## Loop blocks + +A loop block is a [`~modular_pipelines.PipelineBlock`], but the `__call__` method behaves differently. + +- The `__call__` method recieves the iteration variable from the loop wrapper. +- The `__call__` method works directly with the [`~modular_pipelines.BlockState`] instead of the [`~modular_pipelines.PipelineState`]. +- The `__call__` method doesn't require retrieving or updating the [`~modular_pipelines.BlockState`]. + +Loop blocks share the same [`~modular_pipelines.BlockState`] to allow values to accumulate and change for each iteration in the loop. ```py class LoopBlock(PipelineBlock): - # this is used to identify the model family, we won't worry about it in this example model_name = "test" @property def inputs(self): @@ -119,76 +78,16 @@ class LoopBlock(PipelineBlock): return components, block_state ``` -**Step 3: Combine Everything** +## LoopSequentialPipelineBlocks -Finally, assemble your loop by adding the block(s) to the wrapper: +Use the [~modular_pipelines.LoopSequentialPipelineBlocks.from_blocks_dict`] method to add the loop block to the loop wrapper to create [~modular_pipelines.LoopSequentialPipelineBlocks`]. ```py loop = LoopWrapper.from_blocks_dict({"block1": LoopBlock}) ``` -Now you've created a loop with one step: - -```py ->>> loop -LoopWrapper( - Class: LoopSequentialPipelineBlocks - - Description: I'm a loop!! - - Sub-Blocks: - [0] block1 (LoopBlock) - Description: I'm a block used inside the `LoopWrapper` class - -) -``` - -It has two inputs: `x` (used at each step within the loop) and `num_steps` used to define the loop. - -```py ->>> print(loop.doc) -class LoopWrapper - - I'm a loop!! - - Inputs: - - x (`None`, *optional*): - - num_steps (`None`, *optional*): - - Outputs: - - x (`None`): -``` - -**Running the Loop:** - -```py -# run the loop -loop_pipeline = loop.init_pipeline() -x = loop_pipeline(num_steps=10, x=0, output="x") -assert x == 10 -``` - -**Adding Multiple Blocks:** - -We can add multiple blocks to run within each iteration. Let's run the loop block twice within each iteration: +Add more loop blocks to run within each iteration with [~modular_pipelines.LoopSequentialPipelineBlocks.from_blocks_dict`]. This allows you to modify the blocks without changing the loop logic itself. ```py loop = LoopWrapper.from_blocks_dict({"block1": LoopBlock(), "block2": LoopBlock}) -loop_pipeline = loop.init_pipeline() -x = loop_pipeline(num_steps=10, x=0, output="x") -assert x == 20 # Each iteration runs 2 blocks, so 10 iterations * 2 = 20 -``` - -**Key Differences from SequentialPipelineBlocks:** - -The main difference is that loop blocks share the same `block_state` across all iterations, allowing values to accumulate and evolve throughout the loop. Loop blocks could receive additional arguments (like the current iteration index) depending on the loop wrapper's implementation, since the wrapper defines how loop blocks are called. You can easily add, remove, or reorder blocks within the loop without changing the loop logic itself. - -The officially supported denoising loops in Modular Diffusers are implemented using `LoopSequentialPipelineBlocks`. You can explore the actual implementation to see how these concepts work in practice: - -```py -from diffusers.modular_pipelines.stable_diffusion_xl.denoise import StableDiffusionXLDenoiseStep -StableDiffusionXLDenoiseStep() ``` \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md index 518e658506ff..95bf13adfb57 100644 --- a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md @@ -12,11 +12,11 @@ specific language governing permissions and limitations under the License. # SequentialPipelineBlocks -[`SequentialPipelineBlocks`] are a multi-block type that composes other [`PipelineBlocks`] together in a sequence. Data flows linearly from one block to the next using `intermediate_inputs` and `intermediate_outputs`. Each block in [`SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline. +[`~modular_pipelines.SequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.PipelineBlocks`] together in a sequence. Data flows linearly from one block to the next using `intermediate_inputs` and `intermediate_outputs`. Each block in [`~modular_pipelines.SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline. -This guide shows you how to connect two blocks into a [`SequentialPipelineBlocks`]. +This guide shows you how to connect two blocks into a [`~modular_pipelines.SequentialPipelineBlocks`]. -Create two [`PipelineBlocks`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `intermediate_inputs`. +Create two [`~modular_pipelines.PipelineBlocks`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `intermediate_inputs`. @@ -101,7 +101,7 @@ class ImageEncoderBlock(PipelineBlock): Connect the two blocks by defining an [`InsertableDict`] to map the block names to the block instances. Blocks are executed in the order they're registered in `blocks_dict`. -Use [`~SequentialPipelineBlocks.from_blocks_dict`] to create a [`SequentialPipelineBlocks`]. +Use [`~modular_pipelines.SequentialPipelineBlocks.from_blocks_dict`] to create a [`~modular_pipelines.SequentialPipelineBlocks`]. ```py from diffusers.modular_pipelines import SequentialPipelineBlocks, InsertableDict @@ -113,7 +113,7 @@ blocks_dict["image_encoder"] = image_encoder_block blocks = SequentialPipelineBlocks.from_blocks_dict(blocks_dict) ``` -Inspect the sub-blocks in [`SequentialPipelineBlocks`] by calling `blocks`, and for more details about the inputs and outputs, access the `docs` attribute. +Inspect the sub-blocks in [`~modular_pipelines.SequentialPipelineBlocks`] by calling `blocks`, and for more details about the inputs and outputs, access the `docs` attribute. ```py print(blocks) From b9327d8e2d5e1700a1f88de1f23843e2ff85a88d Mon Sep 17 00:00:00 2001 From: stevhliu Date: Fri, 18 Jul 2025 22:33:26 -0700 Subject: [PATCH 08/15] fix --- docs/source/en/_toctree.yml | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 966059ab0cca..1c33fe796a04 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -130,34 +130,10 @@ title: ModularPipeline - local: modular_diffusers/components_manager title: Components Manager - title: Modular Diffusers -- sections: - - local: using-diffusers/consisid - title: ConsisID - - local: using-diffusers/sdxl - title: Stable Diffusion XL - - local: using-diffusers/sdxl_turbo - title: SDXL Turbo - - local: using-diffusers/kandinsky - title: Kandinsky - - local: using-diffusers/omnigen - title: OmniGen - - local: using-diffusers/pag - title: PAG - - local: using-diffusers/inference_with_lcm - title: Latent Consistency Model - - local: using-diffusers/shap-e - title: Shap-E - - local: using-diffusers/diffedit - title: DiffEdit - - local: using-diffusers/inference_with_tcd_lora - title: Trajectory Consistency Distillation-LoRA - - local: using-diffusers/svd - title: Stable Video Diffusion - - local: using-diffusers/marigold_usage - title: Marigold Computer Vision - title: Specific pipeline examples -- sections: + +- title: Training + isExpanded: false + sections: - local: training/overview title: Overview - local: training/create_dataset From ca2b9b33a5aae84bb440441c9af70e6405ddf148 Mon Sep 17 00:00:00 2001 From: stevhliu Date: Tue, 22 Jul 2025 13:17:05 -0700 Subject: [PATCH 09/15] pipeline --- docs/source/en/_toctree.yml | 19 +- .../en/api/modular_diffusers/pipeline.md | 5 + .../modular_diffusers/pipeline_components.md | 6 +- .../en/modular_diffusers/modular_pipeline.md | 1214 ++--------------- 4 files changed, 171 insertions(+), 1073 deletions(-) create mode 100644 docs/source/en/api/modular_diffusers/pipeline.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 1c33fe796a04..5065216c96d4 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -275,15 +275,6 @@ isExpanded: false sections: - title: Main Classes - sections: - - local: api/modular_diffusers/pipeline_blocks - title: Pipeline blocks - - local: api/modular_diffusers/pipeline_states - title: Pipeline states - - local: api/modular_diffusers/pipeline_components - title: Components and configs - title: Modular Diffusers - - isExpanded: false sections: - local: api/configuration title: Configuration @@ -293,6 +284,16 @@ title: Outputs - local: api/quantization title: Quantization + - title: Modular + sections: + - local: api/modular_diffusers/pipeline + title: Pipeline + - local: api/modular_diffusers/pipeline_blocks + title: Blocks + - local: api/modular_diffusers/pipeline_states + title: States + - local: api/modular_diffusers/pipeline_components + title: Components and configs - title: Loaders sections: - local: api/loaders/ip_adapter diff --git a/docs/source/en/api/modular_diffusers/pipeline.md b/docs/source/en/api/modular_diffusers/pipeline.md new file mode 100644 index 000000000000..f60261ea6672 --- /dev/null +++ b/docs/source/en/api/modular_diffusers/pipeline.md @@ -0,0 +1,5 @@ +# Pipeline + +## ModularPipeline + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipeline diff --git a/docs/source/en/api/modular_diffusers/pipeline_components.md b/docs/source/en/api/modular_diffusers/pipeline_components.md index ee8540ac5e13..0e05009096fe 100644 --- a/docs/source/en/api/modular_diffusers/pipeline_components.md +++ b/docs/source/en/api/modular_diffusers/pipeline_components.md @@ -6,4 +6,8 @@ ## ConfigSpec -[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ConfigSpec \ No newline at end of file +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.ConfigSpec + +## ComponentsManager + +[[autodoc]] diffusers.modular_pipelines.components_manager.ComponentsManager \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/modular_pipeline.md b/docs/source/en/modular_diffusers/modular_pipeline.md index 55182b921fdb..fcc7dbebb530 100644 --- a/docs/source/en/modular_diffusers/modular_pipeline.md +++ b/docs/source/en/modular_diffusers/modular_pipeline.md @@ -12,577 +12,177 @@ specific language governing permissions and limitations under the License. # ModularPipeline - +[`ModularPipeline`] converts [`PipelineBlock`]'s into an executable pipeline that loads models and performs the computation steps defined in a block. It is the main interface for users to run a pipeline and it is very similar to the [`DiffusionPipeline`] API. -🧪 **Experimental Feature**: Modular Diffusers is an experimental feature we are actively developing. The API may be subject to breaking changes. +The main difference is to include an expected `output` argument in the pipeline. - - -`ModularPipeline` is the main interface for end users to run pipelines in Modular Diffusers. It takes pipeline blocks and converts them into a runnable pipeline that can load models and execute the computation steps. - -In this guide, we will focus on how to build pipelines using the blocks we officially support at diffusers 🧨. We'll cover how to use predefined blocks and convert them into a `ModularPipeline` for execution. - - - -This guide shows you how to use predefined blocks. If you want to learn how to create your own pipeline blocks, see the [PipelineBlock guide](pipeline_block.md) for creating individual blocks, and the multi-block guides for connecting them together: -- [SequentialPipelineBlocks](sequential_pipeline_blocks.md) (for linear workflows) -- [LoopSequentialPipelineBlocks](loop_sequential_pipeline_blocks.md) (for iterative workflows) -- [AutoPipelineBlocks](auto_pipeline_blocks.md) (for conditional workflows) - -For information on how data flows through pipelines, see the [PipelineState and BlockState guide](modular_diffusers_states.md). - - - - -## Create ModularPipelineBlocks - -In Modular Diffusers system, you build pipelines using Pipeline blocks. Pipeline Blocks are fundamental building blocks - they define what components, inputs/outputs, and computation logics are needed. They are designed to be assembled into workflows for tasks such as image generation, video creation, and inpainting. But they are just definitions and don't actually run anything. To execute blocks, you need to put them into a `ModularPipeline`. We'll first learn how to create predefined blocks here before talking about how to run them using `ModularPipeline`. - -All pipeline blocks inherit from the base class `ModularPipelineBlocks`, including: - -- [`PipelineBlock`]: The most granular block - you define the input/output/components requirements and computation logic. -- [`SequentialPipelineBlocks`]: A multi-block composed of multiple blocks that run sequentially, passing outputs as inputs to the next block. -- [`LoopSequentialPipelineBlocks`]: A special type of `SequentialPipelineBlocks` that runs the same sequence of blocks multiple times (loops), typically used for iterative processes like denoising steps in diffusion models. -- [`AutoPipelineBlocks`]: A multi-block composed of multiple blocks that are selected at runtime based on the inputs. - -It is very easy to use a `ModularPipelineBlocks` officially supported in 🧨 Diffusers - -```py -from diffusers.modular_pipelines.stable_diffusion_xl import StableDiffusionXLTextEncoderStep - -text_encoder_block = StableDiffusionXLTextEncoderStep() -``` - -This is a single `PipelineBlock`. You'll see that this text encoder block uses 2 text_encoders, 2 tokenizers as well as a guider component. It takes user inputs such as `prompt` and `negative_prompt`, and return text embeddings outputs such as `prompt_embeds` and `negative_prompt_embeds`. - -```py ->>> text_encoder_block -StableDiffusionXLTextEncoderStep( - Class: PipelineBlock - Description: Text Encoder step that generate text_embeddings to guide the image generation - Components: - text_encoder (`CLIPTextModel`) - text_encoder_2 (`CLIPTextModelWithProjection`) - tokenizer (`CLIPTokenizer`) - tokenizer_2 (`CLIPTokenizer`) - guider (`ClassifierFreeGuidance`) - Configs: - force_zeros_for_empty_prompt (default: True) - Inputs: - prompt=None, prompt_2=None, negative_prompt=None, negative_prompt_2=None, cross_attention_kwargs=None, clip_skip=None - Intermediates: - - outputs: prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds -) -``` - -More commonly, you need multiple blocks to build your workflow. You can create a `SequentialPipelineBlocks` using block class presets from 🧨 Diffusers. `TEXT2IMAGE_BLOCKS` is a dict containing all the blocks needed for text-to-image generation. + + ```py +import torch from diffusers.modular_pipelines import SequentialPipelineBlocks from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS -t2i_blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS) -``` - -This creates a `SequentialPipelineBlocks`. Unlike the `text_encoder_block` we saw earlier, this is a multi-block and its `sub_blocks` attribute contains a list of other blocks (text_encoder, input, set_timesteps, prepare_latents, prepare_added_con, denoise, decode). Its requirements for components, inputs, and intermediate inputs are combined from these blocks that compose it. At runtime, it executes its sub-blocks sequentially and passes the pipeline state from one block to another. - -```py ->>> t2i_blocks -SequentialPipelineBlocks( - Class: ModularPipelineBlocks - - Description: - - - Components: - text_encoder (`CLIPTextModel`) - text_encoder_2 (`CLIPTextModelWithProjection`) - tokenizer (`CLIPTokenizer`) - tokenizer_2 (`CLIPTokenizer`) - guider (`ClassifierFreeGuidance`) - scheduler (`EulerDiscreteScheduler`) - unet (`UNet2DConditionModel`) - vae (`AutoencoderKL`) - image_processor (`VaeImageProcessor`) - - Configs: - force_zeros_for_empty_prompt (default: True) - - Sub-Blocks: - [0] text_encoder (StableDiffusionXLTextEncoderStep) - Description: Text Encoder step that generate text_embeddings to guide the image generation - - [1] input (StableDiffusionXLInputStep) - Description: Input processing step that: - 1. Determines `batch_size` and `dtype` based on `prompt_embeds` - 2. Adjusts input tensor shapes based on `batch_size` (number of prompts) and `num_images_per_prompt` - - All input tensors are expected to have either batch_size=1 or match the batch_size - of prompt_embeds. The tensors will be duplicated across the batch dimension to - have a final batch_size of batch_size * num_images_per_prompt. - - [2] set_timesteps (StableDiffusionXLSetTimestepsStep) - Description: Step that sets the scheduler's timesteps for inference - - [3] prepare_latents (StableDiffusionXLPrepareLatentsStep) - Description: Prepare latents step that prepares the latents for the text-to-image generation process - - [4] prepare_add_cond (StableDiffusionXLPrepareAdditionalConditioningStep) - Description: Step that prepares the additional conditioning for the text-to-image generation process - - [5] denoise (StableDiffusionXLDenoiseStep) - Description: Denoise step that iteratively denoise the latents. - Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method - At each iteration, it runs blocks defined in `sub_blocks` sequencially: - - `StableDiffusionXLLoopBeforeDenoiser` - - `StableDiffusionXLLoopDenoiser` - - `StableDiffusionXLLoopAfterDenoiser` - This block supports both text2img and img2img tasks. - - [6] decode (StableDiffusionXLDecodeStep) - Description: Step that decodes the denoised latents into images - -) -``` - -This is the block classes preset (`TEXT2IMAGE_BLOCKS`) we used: It is just a dictionary that maps names to ModularPipelineBlocks classes - -```py ->>> TEXT2IMAGE_BLOCKS -InsertableDict([ - 0: ('text_encoder', ), - 1: ('input', ), - 2: ('set_timesteps', ), - 3: ('prepare_latents', ), - 4: ('prepare_add_cond', ), - 5: ('denoise', ), - 6: ('decode', ) -]) -``` -When we create a `SequentialPipelineBlocks` from this preset, it instantiates each block class into actual block objects. Its `sub_blocks` attribute now contains these instantiated objects: +blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS) -```py ->>> t2i_blocks.sub_blocks -InsertableDict([ - 0: ('text_encoder', ), - 1: ('input', ), - 2: ('set_timesteps', ), - 3: ('prepare_latents', ), - 4: ('prepare_add_cond', ), - 5: ('denoise', ), - 6: ('decode', ) -]) -``` +modular_repo_id = "YiYiXu/modular-loader-t2i-0704" +pipeline = blocks.init_pipeline(modular_repo_id) -Note that both the block classes preset and the `sub_blocks` attribute are `InsertableDict` objects. This is a custom dictionary that extends `OrderedDict` with the ability to insert items at specific positions. You can perform all standard dictionary operations (get, set, delete) plus insert items at any index, which is particularly useful for reordering or inserting blocks in the middle of a pipeline. +pipeline.load_default_components(torch_dtype=torch.float16) +pipeline.to("cuda") -**Add a block:** -```py -# BLOCKS is dict of block classes, you need to add class to it -BLOCKS.insert("block_name", BlockClass, index) -# sub_blocks attribute contains instance, add a block instance to the attribute -t2i_blocks.sub_blocks.insert("block_name", block_instance, index) +image = pipeline(prompt="Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", output="images")[0] +image.save("modular_t2i_out.png") ``` -**Remove a block:** -```py -# remove a block class from preset -BLOCKS.pop("text_encoder") -# split out a block instance on its own -text_encoder_block = t2i_blocks.sub_blocks.pop("text_encoder") -``` + + -**Swap block:** ```py -# Replace block class in preset -BLOCKS["prepare_latents"] = CustomPrepareLatents -# Replace in sub_blocks attribute using an block instance -t2i_blocks.sub_blocks["prepare_latents"] = CustomPrepareLatents() -``` - -This means you can mix-and-match blocks in very flexible ways. Let's see some real examples: +import torch +from diffusers.modular_pipelines import SequentialPipelineBlocks +from diffusers.modular_pipelines.stable_diffusion_xl import IMAGE2IMAGE_BLOCKS -**Example 1: Adding IP-Adapter to the Block Classes Preset** -Let's make a new block classes preset by insert IP-Adapter at index 0 (before the text_encoder block), and create a text-to-image pipeline with IP-Adapter support: +blocks = SequentialPipelineBlocks.from_blocks_dict(IMAGE2IMAGE_BLOCKS) -```py -from diffusers.modular_pipelines.stable_diffusion_xl import StableDiffusionXLAutoIPAdapterStep -CUSTOM_BLOCKS = TEXT2IMAGE_BLOCKS.copy() -# CUSTOM_BLOCKS is now a preset including ip_adapter -CUSTOM_BLOCKS.insert("ip_adapter", StableDiffusionXLAutoIPAdapterStep, 0) -# create a blocks isntance from the preset -custom_blocks = SequentialPipelineBlocks.from_blocks_dict(CUSTOM_BLOCKS) -``` +modular_repo_id = "YiYiXu/modular-loader-t2i-0704" +pipeline = blocks.init_pipeline(modular_repo_id) -**Example 2: Extracting a block from a multi-block** -You can extract a block instance from the multi-block to use it independently. A common pattern is to use text_encoder to process prompts once, then reuse the text embeddings outputs to generate multiple images with different settings (schedulers, seeds, inference steps). We can do this by simply extracting the text_encoder block from the pipeline. +pipeline.load_default_components(torch_dtype=torch.float16) +pipeline.to("cuda") -```py -# this gives you StableDiffusionXLTextEncoderStep() ->>> text_encoder_blocks = t2i_blocks.sub_blocks.pop("text_encoder") ->>> text_encoder_blocks +url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png" +init_image = load_image(url) +prompt = "a dog catching a frisbee in the jungle" +image = pipeline(prompt=prompt, image=init_image, strength=0.8, output="images")[0] +image.save("modular_i2i_out.png") ``` -The multi-block now has fewer components and no longer has the `text_encoder` block. If you check its docstring `t2i_blocks.doc`, you will see that it no longer accepts `prompt` as input - you will need to pass the embeddings instead. + + ```py ->>> t2i_blocks -SequentialPipelineBlocks( - Class: ModularPipelineBlocks - - Description: - - Components: - scheduler (`EulerDiscreteScheduler`) - guider (`ClassifierFreeGuidance`) - unet (`UNet2DConditionModel`) - vae (`AutoencoderKL`) - image_processor (`VaeImageProcessor`) - - Blocks: - [0] input (StableDiffusionXLInputStep) - Description: Input processing step that: - 1. Determines `batch_size` and `dtype` based on `prompt_embeds` - 2. Adjusts input tensor shapes based on `batch_size` (number of prompts) and `num_images_per_prompt` - - All input tensors are expected to have either batch_size=1 or match the batch_size - of prompt_embeds. The tensors will be duplicated across the batch dimension to - have a final batch_size of batch_size * num_images_per_prompt. - - [1] set_timesteps (StableDiffusionXLSetTimestepsStep) - Description: Step that sets the scheduler's timesteps for inference - - [2] prepare_latents (StableDiffusionXLPrepareLatentsStep) - Description: Prepare latents step that prepares the latents for the text-to-image generation process - - [3] prepare_add_cond (StableDiffusionXLPrepareAdditionalConditioningStep) - Description: Step that prepares the additional conditioning for the text-to-image generation process - - [4] denoise (StableDiffusionXLDenoiseLoop) - Description: Denoise step that iteratively denoise the latents. - Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method - At each iteration, it runs blocks defined in `blocks` sequencially: - - `StableDiffusionXLLoopBeforeDenoiser` - - `StableDiffusionXLLoopDenoiser` - - `StableDiffusionXLLoopAfterDenoiser` - - - [5] decode (StableDiffusionXLDecodeStep) - Description: Step that decodes the denoised latents into images +import torch +from diffusers.modular_pipelines import SequentialPipelineBlocks +from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS +from diffusers.utils import load_image -) -``` +blocks = SequentialPipelineBlocks.from_blocks_dict(INPAINT_BLOCKS) - +modular_repo_id = "YiYiXu/modular-loader-t2i-0704" +pipeline = blocks.init_pipeline(modular_repo_id) -💡 You can find all the block classes presets we support for each model in `ALL_BLOCKS`. +pipeline.load_default_components(torch_dtype=torch.float16) +pipeline.to("cuda") -```py -# For Stable Diffusion XL -from diffusers.modular_pipelines.stable_diffusion_xl import ALL_BLOCKS -ALL_BLOCKS -# For other models... -from diffusers.modular_pipelines. import ALL_BLOCKS -``` +img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png" +mask_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-inpaint-mask.png" -Each model provides a dictionary that maps all supported tasks/techniques to their corresponding block classes presets. For SDXL, it is +init_image = load_image(img_url) +mask_image = load_image(mask_url) -```py -ALL_BLOCKS = { - "text2img": TEXT2IMAGE_BLOCKS, - "img2img": IMAGE2IMAGE_BLOCKS, - "inpaint": INPAINT_BLOCKS, - "controlnet": CONTROLNET_BLOCKS, - "ip_adapter": IP_ADAPTER_BLOCKS, - "auto": AUTO_BLOCKS, -} +prompt = "A deep sea diver floating" +image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, strength=0.85, output="images")[0] +image.save("moduar_inpaint_out.png") ``` - + + -This covers the essentials of pipeline blocks! Like we have already mentioned, **pipeline blocks are not runnable by themselves**. They are essentially **"definitions"** - they define the specifications and computational steps for a pipeline, but they do not contain any model states. To actually run them, you need to convert them into a `ModularPipeline` object. +This guide will show you how to create a [`ModularPipeline`] and manage the components in it. +## Creating a pipeline -## Modular Repo +There are two ways to create a [`ModularPipeline`]. Assemble and create a pipeline from [`PipelineBlocks`] or load an existing pipeline with [`~ModularPipeline.from_pretrained`]. -To convert blocks into a runnable pipeline, you may need a repository if your blocks contain **pretrained components** (models with checkpoints that need to be loaded from the Hub). Pipeline blocks define what components they need (like a UNet, text encoder, etc.), as well as how to create them: components can be either created using **from_pretrained** method (with checkpoints) or **from_config** (initialized from scratch with default configuration, usually stateless like a guider or scheduler). +You should also initialize a [`ComponentsManager`] to handle device placement and memory and component management. -If your pipeline contains **pretrained components**, you typically need to use a repository to provide the loading specifications and metadata. +> [!TIP] +> Refer to the [ComponentsManager](./components_manager) for more details about how it can help manage components across different workflows. -`ModularPipeline` works specifically with modular repositories, which offer more flexibility in component loading compared to traditional repositories. You can find an example modular repo [here](https://huggingface.co/YiYiXu/modular-diffdiff). + + -A `DiffusionPipeline` defines `model_index.json` to configure its components. However, repositories for Modular Diffusers work with `modular_model_index.json`. Let's walk through the differences here. +Use the [`ModularPipelineBlocks.init_pipeline`] method to create a [`ModularPipeline`] from the component and configuration specifications. This method loads the *specifications* from a `modular_model_index.json` file, but it doesn't load the *models* yet. -In standard `model_index.json`, each component entry is a `(library, class)` tuple: ```py -"text_encoder": [ - "transformers", - "CLIPTextModel" -], -``` - -In `modular_model_index.json`, each component entry contains 3 elements: `(library, class, loading_specs_dict)` - -- `library` and `class`: Information about the actual component loaded in the pipeline at the time of saving (will be `null` if not loaded) -- `loading_specs_dict`: A dictionary containing all information required to load this component, including `repo`, `revision`, `subfolder`, `variant`, and `type_hint`. - -```py -"text_encoder": [ - null, # library of actual loaded component (same as in model_index.json) - null, # class of actual loaded componenet (same as in model_index.json) - { # loading specs map (unique to modular_model_index.json) - "repo": "stabilityai/stable-diffusion-xl-base-1.0", # can be a different repo - "revision": null, - "subfolder": "text_encoder", - "type_hint": [ # (library, class) for the expected component - "transformers", - "CLIPTextModel" - ], - "variant": null - } -], -``` - -Unlike standard repositories where components must be in subfolders within the same repo, modular repositories can fetch components from different repositories based on the `loading_specs_dict`. e.g. the `text_encoder` component will be fetched from the "text_encoder" folder in `stabilityai/stable-diffusion-xl-base-1.0` while other components come from different repositories. - - -## Creating a `ModularPipeline` from `ModularPipelineBlocks` - -Each `ModularPipelineBlocks` has an `init_pipeline` method that can initialize a `ModularPipeline` object based on its component and configuration specifications. - -Let's convert our `t2i_blocks` (which we created earlier) into a runnable `ModularPipeline`. We'll use a `ComponentsManager` to handle device placement, memory management, and component reuse automatically: +from diffusers import ComponentsManager +from diffusers.modular_pipelines import SequentialPipelineBlocks +from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS -```py -# We already have this from earlier t2i_blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS) -# Now convert it to a ModularPipeline -from diffusers import ComponentsManager modular_repo_id = "YiYiXu/modular-loader-t2i-0704" components = ComponentsManager() t2i_pipeline = t2i_blocks.init_pipeline(modular_repo_id, components_manager=components) ``` - - -💡 **ComponentsManager** is the model registry and management system in diffusers, it track all the models in one place and let you add, remove and reuse them across different workflows in most efficient way. Without it, you'd need to manually manage GPU memory, device placement, and component sharing between workflows. See the [Components Manager guide](components_manager.md) for detailed information. - - - -The `init_pipeline()` method creates a ModularPipeline and loads component specifications from the repository's `modular_model_index.json` file, but doesn't load the actual models yet. - - -## Creating a `ModularPipeline` with `from_pretrained` + + -You can create a `ModularPipeline` from a HuggingFace Hub repository with `from_pretrained` method, as long as it's a modular repo: +The [`~ModularPipeline.from_pretrained`] method creates a [`ModularPipeline`] from a modular repository on the Hub. ```py from diffusers import ModularPipeline, ComponentsManager + components = ComponentsManager() pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-loader-t2i-0704", components_manager=components) ``` -Loading custom code is also supported: +Add the `trust_remote_code` argument to load a custom [`ModularPipeline`]. ```py from diffusers import ModularPipeline, ComponentsManager + components = ComponentsManager() modular_repo_id = "YiYiXu/modular-diffdiff-0704" diffdiff_pipeline = ModularPipeline.from_pretrained(modular_repo_id, trust_remote_code=True, components_manager=components) ``` -This modular repository contains custom code. The folder contains these files: - -``` -modular-diffdiff-0704/ -├── block.py # Custom pipeline blocks implementation -├── config.json # Pipeline configuration and auto_map -└── modular_model_index.json # Component loading specifications -``` - -The [`config.json`](https://huggingface.co/YiYiXu/modular-diffdiff-0704/blob/main/config.json) file defines a custom `DiffDiffBlocks` class and points to its implementation: - -```json -{ - "_class_name": "DiffDiffBlocks", - "auto_map": { - "ModularPipelineBlocks": "block.DiffDiffBlocks" - } -} -``` - -The `auto_map` tells the pipeline where to find the custom blocks definition - in this case, it's looking for `DiffDiffBlocks` in the `block.py` file. The actual `DiffDiffBlocks` class is defined in [`block.py`](https://huggingface.co/YiYiXu/modular-diffdiff-0704/blob/main/block.py) within the repository. + + -When `diffdiff_pipeline.blocks` is created, it's based on the `DiffDiffBlocks` definition from the custom code in the repository, allowing you to use specialized blocks that aren't part of the standard diffusers library. +## Loading components -## Loading components into a `ModularPipeline` +A [`ModularPipeline`] doesn't automatically instantiated with components. It only loads the configuration and component specifications. You can load all components with [`~ModularPipeline.load_default_components`] or only load specific components with [`~ModularPipeline.load_components`]. -Unlike `DiffusionPipeline`, when you create a `ModularPipeline` instance (whether using `from_pretrained` or converting from pipeline blocks), its components aren't loaded automatically. You need to explicitly load model components using `load_default_components` or `load_components(names=..,)`: + + ```py -# This will load ALL the expected components into pipeline import torch + t2i_pipeline.load_default_components(torch_dtype=torch.float16) t2i_pipeline.to("cuda") ``` -All expected components are now loaded into the pipeline. You can also partially load specific components using the `names` argument. For example, to only load unet and vae: - -```py ->>> t2i_pipeline.load_components(names=["unet", "vae"], torch_dtype=torch.float16) -``` + + -You can inspect the pipeline's loading status by simply printing the pipeline itself. It helps you understand what components are expected to load, which ones are already loaded, how they were loaded, and what loading specs are available. Let's print out the `t2i_pipeline`: +The example below only loads the UNet and VAE. ```py ->>> t2i_pipeline -StableDiffusionXLModularPipeline { - "_blocks_class_name": "SequentialPipelineBlocks", - "_class_name": "StableDiffusionXLModularPipeline", - "_diffusers_version": "0.35.0.dev0", - "force_zeros_for_empty_prompt": true, - "scheduler": [ - null, - null, - { - "repo": "stabilityai/stable-diffusion-xl-base-1.0", - "revision": null, - "subfolder": "scheduler", - "type_hint": [ - "diffusers", - "EulerDiscreteScheduler" - ], - "variant": null - } - ], - "text_encoder": [ - null, - null, - { - "repo": "stabilityai/stable-diffusion-xl-base-1.0", - "revision": null, - "subfolder": "text_encoder", - "type_hint": [ - "transformers", - "CLIPTextModel" - ], - "variant": null - } - ], - "text_encoder_2": [ - null, - null, - { - "repo": "stabilityai/stable-diffusion-xl-base-1.0", - "revision": null, - "subfolder": "text_encoder_2", - "type_hint": [ - "transformers", - "CLIPTextModelWithProjection" - ], - "variant": null - } - ], - "tokenizer": [ - null, - null, - { - "repo": "stabilityai/stable-diffusion-xl-base-1.0", - "revision": null, - "subfolder": "tokenizer", - "type_hint": [ - "transformers", - "CLIPTokenizer" - ], - "variant": null - } - ], - "tokenizer_2": [ - null, - null, - { - "repo": "stabilityai/stable-diffusion-xl-base-1.0", - "revision": null, - "subfolder": "tokenizer_2", - "type_hint": [ - "transformers", - "CLIPTokenizer" - ], - "variant": null - } - ], - "unet": [ - "diffusers", - "UNet2DConditionModel", - { - "repo": "RunDiffusion/Juggernaut-XL-v9", - "revision": null, - "subfolder": "unet", - "type_hint": [ - "diffusers", - "UNet2DConditionModel" - ], - "variant": "fp16" - } - ], - "vae": [ - "diffusers", - "AutoencoderKL", - { - "repo": "madebyollin/sdxl-vae-fp16-fix", - "revision": null, - "subfolder": null, - "type_hint": [ - "diffusers", - "AutoencoderKL" - ], - "variant": null - } - ] -} -``` - -You can see all the **pretrained components** that will be loaded using `from_pretrained` method are listed as entries. Each entry contains 3 elements: `(library, class, loading_specs_dict)`: - -- **`library` and `class`**: Show the actual loaded component info. If `null`, the component is not loaded yet. -- **`loading_specs_dict`**: Contains all the information needed to load the component (repo, subfolder, variant, etc.) - -In this example: -- **Loaded components**: `vae` and `unet` (their `library` and `class` fields show the actual loaded models) -- **Not loaded yet**: `scheduler`, `text_encoder`, `text_encoder_2`, `tokenizer`, `tokenizer_2` (their `library` and `class` fields are `null`, but you can see their loading specs to know where they'll be loaded from when you call `load_components()`) +import torch -You're looking at essentailly the pipeline's config dict that's synced with the `modular_model_index.json` from the repository you used during `init_pipeline()` - it takes the loading specs that match the pipeline's component requirements. +t2i_pipeline.load_components(names=["unet", "vae"], torch_dtype=torch.float16) +``` -For example, if your pipeline needs a `text_encoder` component, it will include the loading spec for `text_encoder` from the modular repo during the `init_pipeline`. If the pipeline doesn't need a component (like `controlnet` in a basic text-to-image pipeline), that component won't be included even if it exists in the modular repo. + + -There are also a few properties that can provide a quick summary of component loading status: +Print the pipeline to inspect the loaded pretrained components. ```py -# All components expected by the pipeline ->>> t2i_pipeline.component_names -['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'guider', 'scheduler', 'unet', 'vae', 'image_processor'] - -# Components that are not loaded yet (will be loaded with from_pretrained) ->>> t2i_pipeline.null_component_names -['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'scheduler'] - -# Components that will be loaded from pretrained models ->>> t2i_pipeline.pretrained_component_names -['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'scheduler', 'unet', 'vae'] - -# Components that are created with default config (no repo needed) ->>> t2i_pipeline.config_component_names -['guider', 'image_processor'] +t2i_pipeline ``` -From config components (like `guider` and `image_processor`) are not included in the pipeline output above because they don't need loading specs - they're already initialized during pipeline creation. You can see this because they're not listed in `null_component_names`. +This should match the `modular_model_index.json` file from the modular repository a pipeline is initialized from. It shows the loading specifications that match the pipeline's component requirements. If a pipeline doesn't need a component, that component won't be included even if it exists in the modular repository. -## Modifying Loading Specs +To modify where components are loaded, edit the `modular_model_index.json` file in the repository and change it to your desired loading path. The example below loads the UNet from a different repository. -When you call `pipeline.load_components(names=)` or `pipeline.load_default_components()`, it uses the loading specs from the modular repository's `modular_model_index.json`. You can change where components are loaded from by modifying the `modular_model_index.json` in the repository. Just find the file on the Hub and click edit - you can change any field in the loading specs: `repo`, `subfolder`, `variant`, `revision`, etc. - -```py -# Original spec in modular_model_index.json +```json +# original "unet": [ null, null, { @@ -592,7 +192,7 @@ When you call `pipeline.load_components(names=)` or `pipeline.load_default_compo } ] -# Modified spec - changed repo, subfolder, and variant +# modified "unet": [ null, null, { @@ -603,93 +203,80 @@ When you call `pipeline.load_components(names=)` or `pipeline.load_default_compo ] ``` -Now if you create a pipeline using the same blocks and updated repository, it will by default load from the new repository. +### Component loading status + +The pipeline properties below provide more information about which components are loaded. + +Use `component_names` to return all expected components. ```py -pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-loader-t2i-0704", components_manager=components) -pipeline.load_components(names="unet") +t2i_pipeline.component_names +['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'guider', 'scheduler', 'unet', 'vae', 'image_processor'] ``` +Use `null_component_names` to return components that aren't loaded yet. Load these components with [`~ModularPipeline.from_pretrained`]. -## Updating components in a `ModularPipeline` +```py +t2i_pipeline.null_component_names +['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'scheduler'] +``` -Similar to `DiffusionPipeline`, you can load components separately to replace the default ones in the pipeline. In Modular Diffusers, the approach depends on the component type: +Use `pretrained_component_names` to return components that will be loaded from pretrained models. -- **Pretrained components** (`default_creation_method='from_pretrained'`): Must use `ComponentSpec` to load them to update the existing one. -- **Config components** (`default_creation_method='from_config'`): These are components that don't need loading specs - they're created during pipeline initialization with default config. To update them, you can either pass the object directly or pass a ComponentSpec directly. +```py +t2i_pipeline.pretrained_component_names +['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'scheduler', 'unet', 'vae'] +``` - +Use `config_component_names` to return components that are created with the default config (not loaded from a modular repository). Components from a config aren't included because they already initialized during pipeline creation. This is why they aren't listed in `null_component_names`. -💡 **Component Type Changes**: The component type (pretrained vs config-based) can change when you update components. These types are initially defined in pipeline blocks' `expected_components` field using `ComponentSpec` with `default_creation_method`. See the [Customizing Guidance Techniques](#customizing-guidance-techniques) section for examples of how this works in practice. +```py +t2i_pipeline.config_component_names +['guider', 'image_processor'] +``` - +## Updating components -`ComponentSpec` defines how to create or load components and can actually create them using its `create()` method (for ConfigMixin objects) or `load()` method (wrapper around `from_pretrained()`). When a component is loaded with a ComponentSpec, it gets tagged with a unique ID that encodes its creation parameters, allowing you to always extract the original specification using `ComponentSpec.from_component()`. +Components may be updated depending on whether it is a *pretrained component* or a *config component*. -Now let's look at how to update pretrained components in practice: +> [!WARNING] +> A component may change from pretrained to config when updating a component. The component type is initially defined in a block's `expected_components` field. -So instead of +A pretrained component requires updating with [`ComponentSpec`] whereas a config component can be updated by eihter passing the object directly or with [`ComponentSpec`]. The [`ComponentSpec`] shows `default_creation_method="from_pretrained"` for a pretrained component. -```py -from diffusers import UNet2DConditionModel -import torch -unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16) -``` -You should load your model like this +To update a pretrained component, create a [`ComponentSpec`] with the name of the component and where to load it from. Use the [`~ComponentSpec.load`] method to load the component. The [`ComponentSpec`] shows `default_creation_method="from_config` for a config component. ```py from diffusers import ComponentSpec, UNet2DConditionModel + unet_spec = ComponentSpec(name="unet",type_hint=UNet2DConditionModel, repo="stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16") -unet2 = unet_spec.load(torch_dtype=torch.float16) +unet = unet_spec.load(torch_dtype=torch.float16) ``` -The key difference is that the second unet retains its loading specs, so you can extract the spec and recreate the unet: +The [`~ModularPipeline.update_components`] method replaces the component with a new one. ```py -# component -> spec ->>> spec = ComponentSpec.from_component("unet", unet2) ->>> spec -ComponentSpec(name='unet', type_hint=, description=None, config=None, repo='stabilityai/stable-diffusion-xl-base-1.0', subfolder='unet', variant='fp16', revision=None, default_creation_method='from_pretrained') -# spec -> component ->>> unet2_recreatd = spec.load(torch_dtype=torch.float16) +t2i_pipeline.update_components(unet=unet2) ``` -To replace the unet in the pipeline +When a component is updated, the loading specifications are also updated in the pipeline config. -``` -t2i_pipeline.update_components(unet=unet2) -``` +### Component extraction and modification -Not only is the `unet` component swapped, but its loading specs are also updated from "RunDiffusion/Juggernaut-XL-v9" to "stabilityai/stable-diffusion-xl-base-1.0" in pipeline config. This means that if you save the pipeline now and load it back with `from_pretrained`, the new pipeline will by default load the SDXL original unet. +When you use [`ComponentSpec.load`], the new component maintains its loading specifications. This makes it possible to extract the specification and recreate the component. +```py +spec = ComponentSpec.from_component("unet", unet2) +spec +ComponentSpec(name='unet', type_hint=, description=None, config=None, repo='stabilityai/stable-diffusion-xl-base-1.0', subfolder='unet', variant='fp16', revision=None, default_creation_method='from_pretrained') +unet2_recreated = spec.load(torch_dtype=torch.float16) ``` ->>> t2i_pipeline -StableDiffusionXLModularPipeline { - ... - "unet": [ - "diffusers", - "UNet2DConditionModel", - { - "repo": "stabilityai/stable-diffusion-xl-base-1.0", - "revision": null, - "subfolder": "unet", - "type_hint": [ - "diffusers", - "UNet2DConditionModel" - ], - "variant": "fp16" - } - ], - ... -} -``` - -💡 **Modifying Component Specs**: You can get a copy of the current component spec from the pipeline using `get_component_spec()`. This makes it easy to modify the spec and updating components. +The [`~ModularPipeline.get_component_spec`] method gets a copy of the current component specification to modify or update. ```py ->>> unet_spec = t2i_pipeline.get_component_spec("unet") ->>> unet_spec +unet_spec = t2i_pipeline.get_component_spec("unet") +unet_spec ComponentSpec( name='unet', type_hint=, @@ -699,539 +286,40 @@ ComponentSpec( default_creation_method='from_pretrained' ) -# Modify the spec to load from a different repository ->>> unet_spec.repo = "stabilityai/stable-diffusion-xl-base-1.0" - -# Load the component with the modified spec ->>> unet = unet_spec.load(torch_dtype=torch.float16) -``` - - - -## Customizing Guidance Techniques - -Guiders are implementations of different [classifier-free guidance](https://huggingface.co/papers/2207.12598) techniques that can be applied during the denoising process to improve generation quality, control, and adherence to prompts. They work by steering the model predictions towards desired directions and away from undesired directions. In diffusers, guiders are implemented as subclasses of `BaseGuidance`. They can easily be integrated into modular pipelines and provide a flexible way to enhance generation quality without modifying the underlying diffusion models. - -**ClassifierFreeGuidance (CFG)** is the first and most common guidance technique, used in all our standard pipelines. We also offer many other guidance techniques from the latest research in this area - **PerturbedAttentionGuidance (PAG)**, **SkipLayerGuidance (SLG)**, **SmoothedEnergyGuidance (SEG)**, and others that can provide better results for specific use cases. - -This section demonstrates how to use guiders using the component updating methods we just learned. Since `BaseGuidance` components are stateless (similar to schedulers), they are typically created with default configurations during pipeline initialization using `default_creation_method='from_config'`. This means they don't require loading specs from the repository - you won't see guider listed in `modular_model_index.json` files. - -Let's take a look at the default guider configuration: - -```py ->>> t2i_pipeline.get_component_spec("guider") -ComponentSpec(name='guider', type_hint=, description=None, config=FrozenDict([('guidance_scale', 7.5), ('guidance_rescale', 0.0), ('use_original_formulation', False), ('start', 0.0), ('stop', 1.0), ('_use_default_values', ['start', 'guidance_rescale', 'stop', 'use_original_formulation'])]), repo=None, subfolder=None, variant=None, revision=None, default_creation_method='from_config') -``` - -As you can see, the guider is configured to use `ClassifierFreeGuidance` with default parameters and `default_creation_method='from_config'`, meaning it's created during pipeline initialization rather than loaded from a repository. Let's verify this, here we run `init_pipeline()` without a modular repo, and there it is, a guider with the default configuration we just saw - - -```py ->>> pipeline = t2i_blocks.init_pipeline() ->>> pipeline.guider -ClassifierFreeGuidance { - "_class_name": "ClassifierFreeGuidance", - "_diffusers_version": "0.35.0.dev0", - "guidance_rescale": 0.0, - "guidance_scale": 7.5, - "start": 0.0, - "stop": 1.0, - "use_original_formulation": false -} -``` - -#### Modify Parameters of the Same Guider Type - -To change parameters of the same guider type (e.g., adjusting the `guidance_scale` for CFG), you have two options: - -**Option 1: Use ComponentSpec.create() method** - -You just need to pass the parameter with the new value to override the default one. - -```python ->>> guider_spec = t2i_pipeline.get_component_spec("guider") ->>> guider = guider_spec.create(guidance_scale=10) ->>> t2i_pipeline.update_components(guider=guider) -``` - -**Option 2: Pass ComponentSpec directly** - -Update the spec directly and pass it to `update_components()`. - -```python ->>> guider_spec = t2i_pipeline.get_component_spec("guider") ->>> guider_spec.config["guidance_scale"] = 10 ->>> t2i_pipeline.update_components(guider=guider_spec) -``` - -Both approaches produce the same result: -```python ->>> t2i_pipeline.guider -ClassifierFreeGuidance { - "_class_name": "ClassifierFreeGuidance", - "_diffusers_version": "0.35.0.dev0", - "guidance_rescale": 0.0, - "guidance_scale": 10, - "start": 0.0, - "stop": 1.0, - "use_original_formulation": false -} -``` - -#### Switch to a Different Guider Type - -Switching between guidance techniques is as simple as passing a guider object of that technique: - -```py -from diffusers import LayerSkipConfig, PerturbedAttentionGuidance -config = LayerSkipConfig(indices=[2, 9], fqn="mid_block.attentions.0.transformer_blocks", skip_attention=False, skip_attention_scores=True, skip_ff=False) -guider = PerturbedAttentionGuidance( - guidance_scale=5.0, perturbed_guidance_scale=2.5, perturbed_guidance_config=config -) -t2i_pipeline.update_components(guider=guider) -``` - -Note that you will get a warning about changing the guider type, which is expected: +# modify to load from a different repository +unet_spec.repo = "stabilityai/stable-diffusion-xl-base-1.0" +# load component with modified spec +unet = unet_spec.load(torch_dtype=torch.float16) ``` -ModularPipeline.update_components: adding guider with new type: PerturbedAttentionGuidance, previous type: ClassifierFreeGuidance -``` - - -- For `from_config` components (like guiders, schedulers): You can pass an object of required type OR pass a ComponentSpec directly (which calls `create()` under the hood) -- For `from_pretrained` components (like models): You must use ComponentSpec to ensure proper tagging and loading +## Modular repository - +A repository is required if the pipeline blocks use *pretrained components*. The repository supplies loading specifications and metadata. -Let's verify that the guider has been updated: +[`ModularPipeline`] specifically requires *modular repositories* (see [example repository](https://huggingface.co/YiYiXu/modular-diffdiff)) which are more flexible than a typical repository. It contains a `modular_model_index.json` file containing the following 3 elements. -```py ->>> t2i_pipeline.guider -PerturbedAttentionGuidance { - "_class_name": "PerturbedAttentionGuidance", - "_diffusers_version": "0.35.0.dev0", - "guidance_rescale": 0.0, - "guidance_scale": 5.0, - "perturbed_guidance_config": { - "dropout": 1.0, - "fqn": "mid_block.attentions.0.transformer_blocks", - "indices": [ - 2, - 9 - ], - "skip_attention": false, - "skip_attention_scores": true, - "skip_ff": false - }, - "perturbed_guidance_layers": null, - "perturbed_guidance_scale": 2.5, - "perturbed_guidance_start": 0.01, - "perturbed_guidance_stop": 0.2, - "start": 0.0, - "stop": 1.0, - "use_original_formulation": false -} +- `library` and `class` shows which library the component was loaded from and it's class. If `null`, the component hasn't been loaded yet. +- `loading_specs_dict` contains the information required to load the component such as the repository and subfolder it is loaded from. -``` +Unlike standard repositories, a modular repository can fetch components from different repositories based on the `loading_specs_dict`. Components don't need to exist in the same repository. -The component spec has also been updated to reflect the new guider type: +A modular repository may contain custom code for loading a [`ModularPipeline`]. This allows you to use specialized blocks that aren't native to Diffusers. -```py ->>> t2i_pipeline.get_component_spec("guider") -ComponentSpec(name='guider', type_hint=, description=None, config=FrozenDict([('guidance_scale', 5.0), ('perturbed_guidance_scale', 2.5), ('perturbed_guidance_start', 0.01), ('perturbed_guidance_stop', 0.2), ('perturbed_guidance_layers', None), ('perturbed_guidance_config', LayerSkipConfig(indices=[2, 9], fqn='mid_block.attentions.0.transformer_blocks', skip_attention=False, skip_attention_scores=True, skip_ff=False, dropout=1.0)), ('guidance_rescale', 0.0), ('use_original_formulation', False), ('start', 0.0), ('stop', 1.0), ('_use_default_values', ['perturbed_guidance_start', 'use_original_formulation', 'perturbed_guidance_layers', 'stop', 'start', 'guidance_rescale', 'perturbed_guidance_stop']), ('_class_name', 'PerturbedAttentionGuidance'), ('_diffusers_version', '0.35.0.dev0')]), repo=None, subfolder=None, variant=None, revision=None, default_creation_method='from_config') ``` - -The "guider" is still a `from_config` component: is still not included in the pipeline config and will not be saved into the `modular_model_index.json`. - -```py ->>> assert "guider" not in t2i_pipeline.config +modular-diffdiff-0704/ +├── block.py # Custom pipeline blocks implementation +├── config.json # Pipeline configuration and auto_map +└── modular_model_index.json # Component loading specifications ``` -However, you can change it to a `from_pretrained` component, which allows you to upload your customized guider to the Hub and load it into your pipeline. - -#### Loading Custom Guiders from Hub - -If you already have a guider saved on the Hub and a `modular_model_index.json` with the loading spec for that guider, it will automatically be changed to a `from_pretrained` component during pipeline initialization. - -For example, this `modular_model_index.json` includes loading specs for the guider: +The [config.json](https://huggingface.co/YiYiXu/modular-diffdiff-0704/blob/main/config.json) file contains an `auto_map` key that points to where a custom block is defined in `block.py`. ```json { - "guider": [ - null, - null, - { - "repo": "YiYiXu/modular-loader-t2i-guider", - "revision": null, - "subfolder": "pag_guider", - "type_hint": [ - "diffusers", - "PerturbedAttentionGuidance" - ], - "variant": null - } - ] + "_class_name": "DiffDiffBlocks", + "auto_map": { + "ModularPipelineBlocks": "block.DiffDiffBlocks" + } } -``` - -When you use this repository to create a pipeline with the same blocks (that originally configured guider as a `from_config` component), the guider becomes a `from_pretrained` component. This means it doesn't get created during initialization, and after you call `load_default_components()`, it loads based on the spec - resulting in the PAG guider instead of the default CFG. - -```py -t2i_pipeline = t2i_blocks.init_pipeline("YiYiXu/modular-doc-guider") -assert t2i_pipeline.guider is None # Not created during init -t2i_pipeline.load_default_components() -t2i_pipeline.guider # Now loaded as PAG guider -``` - -#### Upload Custom Guider to Hub for Easy Loading & Sharing - -Now let's see how we can share the guider on the Hub and change it to a `from_pretrained` component. - -```py -guider.push_to_hub("YiYiXu/modular-loader-t2i-guider", subfolder="pag_guider") -``` - -Voilà! Now you have a subfolder called `pag_guider` on that repository. - -You have a few options to make this guider available in your pipeline: - -1. **Directly modify the `modular_model_index.json`** to add a loading spec for the guider by pointing to a folder containing the desired guider config. - -2. **Use the `update_components` method** to change it to a `from_pretrained` component for your pipeline. This is easier if you just want to try it out with different repositories. - -Let's use the second approach and change our guider_spec to use `from_pretrained` as the default creation method and update the loading spec to use this subfolder we just created: - -```python -guider_spec = t2i_pipeline.get_component_spec("guider") -guider_spec.default_creation_method="from_pretrained" -guider_spec.repo="YiYiXu/modular-loader-t2i-guider" -guider_spec.subfolder="pag_guider" -pag_guider = guider_spec.load() -t2i_pipeline.update_components(guider=pag_guider) -``` - -You will get a warning about changing the creation method: - -``` -ModularPipeline.update_components: changing the default_creation_method of guider from from_config to from_pretrained. -``` - -Now not only the `guider` component and its component_spec are updated, but so is the pipeline config. - -If you want to change the default behavior for future pipelines, you can push the updated pipeline to the Hub. This way, when others use your repository, they'll get the PAG guider by default. However, this is optional - you don't have to do this if you just want to experiment locally. - -```py -t2i_pipeline.push_to_hub("YiYiXu/modular-doc-guider") -``` - - - - -Experiment with different techniques and parameters to find what works best for your specific use case! You can find all the guider class we support [here](TODO: API doc) - -Additionally, you can write your own guider implementations, for example, CFG Zero* combined with Skip Layer Guidance, and they should be compatible out-of-the-box with modular diffusers! - - - -## Running a `ModularPipeline` - -The API to run the `ModularPipeline` is very similar to how you would run a regular `DiffusionPipeline`: - -```py ->>> image = pipeline(prompt="a cat", num_inference_steps=15, output="images")[0] -``` - -There are a few key differences though: -1. You can also pass a `PipelineState` object directly to the pipeline instead of individual arguments -2. If you do not specify the `output` argument, it returns the `PipelineState` object -3. You can pass a list as `output`, e.g. `pipeline(... output=["images", "latents"])` will return a dictionary containing both the generated image and the final denoised latents - -Under the hood, `ModularPipeline`'s `__call__` method is a wrapper around the pipeline blocks' `__call__` method: it creates a `PipelineState` object and populates it with user inputs, then returns the output to the user based on the `output` argument. It also ensures that all pipeline-level config and components are exposed to all pipeline blocks by preparing and passing a `components` input. - - - -You can inspect the docstring of a `ModularPipeline` to check what arguments the pipeline accepts and how to specify the `output` you want. It will list all available outputs (basically everything in the intermediate pipeline state) so you can choose from the list. - -```py -t2i_pipeline.doc -``` - -**Important**: It is important to always check the docstring because arguments can be different from standard pipelines that you're familar with. For example, in Modular Diffusers we standardized controlnet image input as `control_image`, but regular pipelines have inconsistencies over the names, e.g. controlnet text-to-image uses `image` while SDXL controlnet img2img uses `control_image`. - -**Note**: The `output` list might be longer than you expected - it includes everything in the intermediate state that you can choose to return. Most of the time, you'll just want `output="images"` or `output="latents"`. - - - -#### Text-to-Image, Image-to-Image, and Inpainting - -These are minimum inference examples for basic tasks: text-to-image, image-to-image, and inpainting. The process to create different pipelines is the same - only difference is the block classes presets. The inference is also more or less same to standard pipelines, but please always check `.doc` for correct input names and remember to pass `output="images"`. - - - - - -```py -import torch -from diffusers.modular_pipelines import SequentialPipelineBlocks -from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS - -# create pipeline from official blocks preset -blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS) - -modular_repo_id = "YiYiXu/modular-loader-t2i-0704" -pipeline = blocks.init_pipeline(modular_repo_id) - -pipeline.load_default_components(torch_dtype=torch.float16) -pipeline.to("cuda") - -# run pipeline, need to pass a "output=images" argument -image = pipeline(prompt="Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", output="images")[0] -image.save("modular_t2i_out.png") -``` - - - - -```py -import torch -from diffusers.modular_pipelines import SequentialPipelineBlocks -from diffusers.modular_pipelines.stable_diffusion_xl import IMAGE2IMAGE_BLOCKS - -# create pipeline from blocks preset -blocks = SequentialPipelineBlocks.from_blocks_dict(IMAGE2IMAGE_BLOCKS) - -modular_repo_id = "YiYiXu/modular-loader-t2i-0704" -pipeline = blocks.init_pipeline(modular_repo_id) - -pipeline.load_default_components(torch_dtype=torch.float16) -pipeline.to("cuda") - -url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png" -init_image = load_image(url) -prompt = "a dog catching a frisbee in the jungle" -image = pipeline(prompt=prompt, image=init_image, strength=0.8, output="images")[0] -image.save("modular_i2i_out.png") -``` - - - - -```py -import torch -from diffusers.modular_pipelines import SequentialPipelineBlocks -from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS -from diffusers.utils import load_image - -# create pipeline from blocks preset -blocks = SequentialPipelineBlocks.from_blocks_dict(INPAINT_BLOCKS) - -modular_repo_id = "YiYiXu/modular-loader-t2i-0704" -pipeline = blocks.init_pipeline(modular_repo_id) - -pipeline.load_default_components(torch_dtype=torch.float16) -pipeline.to("cuda") - -img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png" -mask_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-inpaint-mask.png" - -init_image = load_image(img_url) -mask_image = load_image(mask_url) - -prompt = "A deep sea diver floating" -image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, strength=0.85, output="images")[0] -image.save("moduar_inpaint_out.png") -``` - - - - -#### ControlNet - -For ControlNet, we provide one auto block you can place at the `denoise` step. Let's create it and inspect it to see what it tells us. - - - -💡 **How to explore new tasks**: When you want to figure out how to do a specific task in Modular Diffusers, it is a good idea to start by checking what block classes presets we offer in `ALL_BLOCKS`. Then create the block instance and inspect it - it will show you the required components, description, and sub-blocks. This is crucial for understanding what each block does and what it needs. - - - -```py ->>> from diffusers.modular_pipelines.stable_diffusion_xl import ALL_BLOCKS ->>> ALL_BLOCKS["controlnet"] -InsertableDict([ - 0: ('denoise', ) -]) ->>> controlnet_blocks = ALL_BLOCKS["controlnet"]["denoise"]() ->>> controlnet_blocks -StableDiffusionXLAutoControlnetStep( - Class: SequentialPipelineBlocks - - ==================================================================================================== - This pipeline contains blocks that are selected at runtime based on inputs. - Trigger Inputs: {'mask', 'control_mode', 'control_image', 'controlnet_cond'} - Use `get_execution_blocks()` with input names to see selected blocks (e.g. `get_execution_blocks('mask')`). - ==================================================================================================== - - - Description: Controlnet auto step that prepare the controlnet input and denoise the latents. It works for both controlnet and controlnet_union and supports text2img, img2img and inpainting tasks. (it should be replace at 'denoise' step) - - - Components: - controlnet (`ControlNetUnionModel`) - control_image_processor (`VaeImageProcessor`) - scheduler (`EulerDiscreteScheduler`) - unet (`UNet2DConditionModel`) - guider (`ClassifierFreeGuidance`) - - Sub-Blocks: - [0] controlnet_input (StableDiffusionXLAutoControlNetInputStep) - Description: Controlnet Input step that prepare the controlnet input. - This is an auto pipeline block that works for both controlnet and controlnet_union. - (it should be called right before the denoise step) - `StableDiffusionXLControlNetUnionInputStep` is called to prepare the controlnet input when `control_mode` and `control_image` are provided. - - `StableDiffusionXLControlNetInputStep` is called to prepare the controlnet input when `control_image` is provided. - if neither `control_mode` nor `control_image` is provided, step will be skipped. - - [1] controlnet_denoise (StableDiffusionXLAutoControlNetDenoiseStep) - Description: Denoise step that iteratively denoise the latents with controlnet. This is a auto pipeline block that using controlnet for text2img, img2img and inpainting tasks.This block should not be used without a controlnet_cond input - `StableDiffusionXLInpaintControlNetDenoiseStep` (inpaint_controlnet_denoise) is used when mask is provided. - `StableDiffusionXLControlNetDenoiseStep` (controlnet_denoise) is used when mask is not provided but controlnet_cond is provided. - If neither mask nor controlnet_cond are provided, step will be skipped. - -) -``` - - - -💡 **Auto Blocks**: This is first time we meet a Auto Blocks! `AutoPipelineBlocks` automatically adapt to your inputs by combining multiple workflows with conditional logic. This is why one convenient block can work for all tasks and controlnet types. See the [Auto Blocks Guide](./auto_pipeline_blocks.md) for more details. - - - -The block shows us it has two steps (prepare inputs + denoise) and supports all tasks with both controlnet and controlnet union. Most importantly, it tells us to place it at the 'denoise' step. Let's do exactly that: - -```py -import torch -from diffusers.modular_pipelines import SequentialPipelineBlocks -from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS, StableDiffusionXLAutoControlnetStep -from diffusers.utils import load_image - -# create pipeline from blocks preset -blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS) - -# these two lines applies controlnet -controlnet_blocks = StableDiffusionXLAutoControlnetStep() -blocks.sub_blocks["denoise"] = controlnet_blocks -``` - -Before we convert the blocks into a pipeline and load its components, let's inspect the blocks and its docs again to make sure it was assembled correctly. You should be able to see that `controlnet` and `control_image_processor` are now listed as `Components`, so we should initialize the pipeline with a repo that contains desired loading specs for these 2 components. - -```py -# make sure to a modular_repo including controlnet -modular_repo_id = "YiYiXu/modular-demo-auto" -pipeline = blocks.init_pipeline(modular_repo_id) -pipeline.load_default_components(torch_dtype=torch.float16) -pipeline.to("cuda") - -# generate -canny_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" -) -image = pipeline( - prompt="a bird", controlnet_conditioning_scale=0.5, control_image=canny_image, output="images" -)[0] -image.save("modular_control_out.png") -``` - -#### IP-Adapter - -**Challenge time!** Before we show you how to apply IP-adapter, try doing it yourself! Use the same process we just walked you through with ControlNet: check the official blocks preset, inspect the block instance and docstring `.doc`, and adapt a regular IP-adapter example to modular. - -Let's walk through the steps: - -1. Check blocks preset - -```py ->>> from diffusers.modular_pipelines.stable_diffusion_xl import ALL_BLOCKS ->>> ALL_BLOCKS["ip_adapter"] -InsertableDict([ - 0: ('ip_adapter', ) -]) -``` - -2. inspect the block & doc - -``` ->>> from diffusers.modular_pipelines.stable_diffusion_xl import StableDiffusionXLAutoIPAdapterStep ->>> ip_adapter_blocks = StableDiffusionXLAutoIPAdapterStep() ->>> ip_adapter_blocks -StableDiffusionXLAutoIPAdapterStep( - Class: AutoPipelineBlocks - - ==================================================================================================== - This pipeline contains blocks that are selected at runtime based on inputs. - Trigger Inputs: {'ip_adapter_image'} - Use `get_execution_blocks()` with input names to see selected blocks (e.g. `get_execution_blocks('ip_adapter_image')`). - ==================================================================================================== - - - Description: Run IP Adapter step if `ip_adapter_image` is provided. This step should be placed before the 'input' step. - - - - Components: - image_encoder (`CLIPVisionModelWithProjection`) - feature_extractor (`CLIPImageProcessor`) - unet (`UNet2DConditionModel`) - guider (`ClassifierFreeGuidance`) - - Sub-Blocks: - • ip_adapter [trigger: ip_adapter_image] (StableDiffusionXLIPAdapterStep) - Description: IP Adapter step that prepares ip adapter image embeddings. - Note that this step only prepares the embeddings - in order for it to work correctly, you need to load ip adapter weights into unet via ModularPipeline.load_ip_adapter() and pipeline.set_ip_adapter_scale(). - See [ModularIPAdapterMixin](https://huggingface.co/docs/diffusers/api/loaders/ip_adapter#diffusers.loaders.ModularIPAdapterMixin) for more details - -) -``` -3. follow the instruction to build - -```py -import torch -from diffusers.modular_pipelines import SequentialPipelineBlocks -from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS - -# create pipeline from official blocks preset -blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS) - -# insert ip_adapter_blocks before the input step as instructed -blocks.sub_blocks.insert("ip_adapter", ip_adapter_blocks, 1) - -# inspec the blocks before you convert it into pipelines, -# and make sure to use a repo that contains the loading spec for all components -# for ip-adapter, you need image_encoder & feature_extractor -modular_repo_id = "YiYiXu/modular-demo-auto" -pipeline = blocks.init_pipeline(modular_repo_id) - -pipeline.load_default_components(torch_dtype=torch.float16) -pipeline.load_ip_adapter( - "h94/IP-Adapter", - subfolder="sdxl_models", - weight_name="ip-adapter_sdxl.bin" -) -pipeline.set_ip_adapter_scale(0.8) -pipeline.to("cuda") -``` - -4. adapt an example to modular - -We are using [this one](https://huggingface.co/docs/diffusers/using-diffusers/ip_adapter?ipadapter-variants=IP-Adapter+Plus#ip-adapter) from our IP-Adapter doc! - - -```py -from diffusers.utils import load_image -image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/ip_adapter_diner.png") -image = pipeline( - prompt="a polar bear sitting in a chair drinking a milkshake", - ip_adapter_image=image, - negative_prompt="deformed, ugly, wrong proportion, low res, bad anatomy, worst quality, low quality", - output="images" -)[0] -image.save("modular_ipa_out.png") -``` - - +``` \ No newline at end of file From 6bf17ddedec0e090ef46bd64bb1cb5a721c5797e Mon Sep 17 00:00:00 2001 From: stevhliu Date: Tue, 22 Jul 2025 17:57:12 -0700 Subject: [PATCH 10/15] guiders --- docs/source/en/_toctree.yml | 4 + .../en/api/modular_diffusers/guiders.md | 39 ++++ docs/source/en/modular_diffusers/guiders.md | 175 ++++++++++++++++++ 3 files changed, 218 insertions(+) create mode 100644 docs/source/en/api/modular_diffusers/guiders.md create mode 100644 docs/source/en/modular_diffusers/guiders.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 5065216c96d4..3939eafeeb1a 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -130,6 +130,8 @@ title: ModularPipeline - local: modular_diffusers/components_manager title: Components Manager + - local: modular_diffusers/guiders + title: Guiders - title: Training isExpanded: false @@ -294,6 +296,8 @@ title: States - local: api/modular_diffusers/pipeline_components title: Components and configs + - local: api/modular_diffusers/guiders + title: Guiders - title: Loaders sections: - local: api/loaders/ip_adapter diff --git a/docs/source/en/api/modular_diffusers/guiders.md b/docs/source/en/api/modular_diffusers/guiders.md new file mode 100644 index 000000000000..a24eb7220749 --- /dev/null +++ b/docs/source/en/api/modular_diffusers/guiders.md @@ -0,0 +1,39 @@ +# Guiders + +Guiders are components in Modular Diffusers that control how the diffusion process is guided during generation. They implement various guidance techniques to improve generation quality and control. + +## BaseGuidance + +[[autodoc]] diffusers.guiders.guider_utils.BaseGuidance + +## ClassifierFreeGuidance + +[[autodoc]] diffusers.guiders.classifier_free_guidance.ClassifierFreeGuidance + +## ClassifierFreeZeroStarGuidance + +[[autodoc]] diffusers.guiders.classifier_free_zero_star_guidance.ClassifierFreeZeroStarGuidance + +## SkipLayerGuidance + +[[autodoc]] diffusers.guiders.skip_layer_guidance.SkipLayerGuidance + +## SmoothedEnergyGuidance + +[[autodoc]] diffusers.guiders.smoothed_energy_guidance.SmoothedEnergyGuidance + +## PerturbedAttentionGuidance + +[[autodoc]] diffusers.guiders.perturbed_attention_guidance.PerturbedAttentionGuidance + +## AdaptiveProjectedGuidance + +[[autodoc]] diffusers.guiders.adaptive_projected_guidance.AdaptiveProjectedGuidance + +## AutoGuidance + +[[autodoc]] diffusers.guiders.auto_guidance.AutoGuidance + +## TangentialClassifierFreeGuidance + +[[autodoc]] diffusers.guiders.tangential_classifier_free_guidance.TangentialClassifierFreeGuidance diff --git a/docs/source/en/modular_diffusers/guiders.md b/docs/source/en/modular_diffusers/guiders.md new file mode 100644 index 000000000000..2c7eddf5c7b8 --- /dev/null +++ b/docs/source/en/modular_diffusers/guiders.md @@ -0,0 +1,175 @@ + + +# Guiders + +[Classifier-free guidance](https://huggingface.co/papers/2207.12598) steers model generation that better match a prompt and is commonly used to improve generation quality, control, and adherence to prompts. There are different types of guidance methods, and in Diffusers, they are known as *guiders*. Like blocks, it is easy to switch and use different guiders for different use cases without rewriting the pipeline. + +This guide will show you how to switch guiders, adjust guider parameters, and load and share them to the Hub. + +## Switching guiders + +[`ClassifierFreeGuidance`] is the default guider and created when a pipeline is initialized with [`~ModularPipelineBlocks.init_pipeline`]. It is created by `from_config` which means it doesn't require loading specifications from a modular repository. A guider won't be listed in `modular_model_index.json`. + +Use [`~ModularPipeline.get_component_spec`] to inspect a guider. + +```py +t2i_pipeline.get_component_spec("guider") +ComponentSpec(name='guider', type_hint=, description=None, config=FrozenDict([('guidance_scale', 7.5), ('guidance_rescale', 0.0), ('use_original_formulation', False), ('start', 0.0), ('stop', 1.0), ('_use_default_values', ['start', 'guidance_rescale', 'stop', 'use_original_formulation'])]), repo=None, subfolder=None, variant=None, revision=None, default_creation_method='from_config') +``` + +Switch to a different guider by passing the new guider to [`~ModularPipeline.update_components`]. + +> [!TIP] +> Changing guiders will return some text letting you know you're changing the guider type. +> ```bash +> ModularPipeline.update_components: adding guider with new type: PerturbedAttentionGuidance, previous type: ClassifierFreeGuidance +> ``` + +```py +from diffusers import LayerSkipConfig, PerturbedAttentionGuidance + +config = LayerSkipConfig(indices=[2, 9], fqn="mid_block.attentions.0.transformer_blocks", skip_attention=False, skip_attention_scores=True, skip_ff=False) +guider = PerturbedAttentionGuidance( + guidance_scale=5.0, perturbed_guidance_scale=2.5, perturbed_guidance_config=config +) +t2i_pipeline.update_components(guider=guider) +``` + +Use [`~ModularPipeline.get_component_spec`] again to verify the guider type is different. + +```py +t2i_pipeline.get_component_spec("guider") +ComponentSpec(name='guider', type_hint=, description=None, config=FrozenDict([('guidance_scale', 5.0), ('perturbed_guidance_scale', 2.5), ('perturbed_guidance_start', 0.01), ('perturbed_guidance_stop', 0.2), ('perturbed_guidance_layers', None), ('perturbed_guidance_config', LayerSkipConfig(indices=[2, 9], fqn='mid_block.attentions.0.transformer_blocks', skip_attention=False, skip_attention_scores=True, skip_ff=False, dropout=1.0)), ('guidance_rescale', 0.0), ('use_original_formulation', False), ('start', 0.0), ('stop', 1.0), ('_use_default_values', ['perturbed_guidance_start', 'use_original_formulation', 'perturbed_guidance_layers', 'stop', 'start', 'guidance_rescale', 'perturbed_guidance_stop']), ('_class_name', 'PerturbedAttentionGuidance'), ('_diffusers_version', '0.35.0.dev0')]), repo=None, subfolder=None, variant=None, revision=None, default_creation_method='from_config') +``` + +## Loading custom guiders + +Guiders that are already saved on the Hub with a `modular_model_index.json` file are considered a `from_pretrained` component now instead of a `from_config` component. + +```json +{ + "guider": [ + null, + null, + { + "repo": "YiYiXu/modular-loader-t2i-guider", + "revision": null, + "subfolder": "pag_guider", + "type_hint": [ + "diffusers", + "PerturbedAttentionGuidance" + ], + "variant": null + } + ] +} +``` + +The guider is only created after calling [`~ModularPipeline.load_default_components`] based on the loading specification in `modular_model_index.json`. + +```py +t2i_pipeline = t2i_blocks.init_pipeline("YiYiXu/modular-doc-guider") +# not created during init +assert t2i_pipeline.guider is None +t2i_pipeline.load_default_components() +# loaded as PAG guider +t2i_pipeline.guider +``` + + +## Changing guider parameters + +The guider parameters can be adjusted with either the [`~ComponentSpec.create`] method or with [`~ModularPipeline.update_components`]. The example below changes the `guidance_scale` value. + + + + +```py +guider_spec = t2i_pipeline.get_component_spec("guider") +guider = guider_spec.create(guidance_scale=10) +t2i_pipeline.update_components(guider=guider) +``` + + + + +```py +guider_spec = t2i_pipeline.get_component_spec("guider") +guider_spec.config["guidance_scale"] = 10 +t2i_pipeline.update_components(guider=guider_spec) +``` + + + + +## Uploading custom guiders + +Call the [`~utils.PushToHubMixin.push_to_hub`] method on a custom guider to share it to the Hub. + +```py +guider.push_to_hub("YiYiXu/modular-loader-t2i-guider", subfolder="pag_guider") +``` + +To make this guider available to the pipeline, either modify the `modular_model_index.json` file or use the [`~ModularPipeline.update_components`] method. + + + + +Edit the `modular_model_index.json` file and add a loading specification for the guider by pointing to a folder containing the guider config. + +```json +{ + "guider": [ + "diffusers", + "PerturbedAttentionGuidance", + { + "repo": "YiYiXu/modular-loader-t2i-guider", + "revision": null, + "subfolder": "pag_guider", + "type_hint": [ + "diffusers", + "PerturbedAttentionGuidance" + ], + "variant": null + } + ], +``` + + + + +Change the [`~ComponentSpec.default_creation_method`] to `"from_pretrained"` and use [`~ModularPipeline.update_components`] to update the guider and component specifications as well as the pipeline config. + +> [!TIP] +> Changing the creation method will return some text letting you know you're changing the creation type to `from_pretrained`. +> ```bash +> ModularPipeline.update_components: changing the default_creation_method of guider from from_config to from_pretrained. +> ``` + +```py +guider_spec = t2i_pipeline.get_component_spec("guider") +guider_spec.default_creation_method="from_pretrained" +guider_spec.repo="YiYiXu/modular-loader-t2i-guider" +guider_spec.subfolder="pag_guider" +pag_guider = guider_spec.load() +t2i_pipeline.update_components(guider=pag_guider) +``` + +To make it the default guider for a pipeline, call [`~utils.PushToHubMixin.push_to_hub`]. This is an optional step and not necessary if you only want to experiment locally. + +```py +t2i_pipeline.push_to_hub("YiYiXu/modular-doc-guider") +``` + + + \ No newline at end of file From 3d17125d2f174ec7bb8bb638d8b2e78f67ac29d6 Mon Sep 17 00:00:00 2001 From: stevhliu Date: Wed, 23 Jul 2025 13:45:30 -0700 Subject: [PATCH 11/15] components manager --- docs/source/en/_toctree.yml | 2 +- .../modular_diffusers/pipeline_components.md | 6 +- .../modular_diffusers/auto_pipeline_blocks.md | 6 +- .../modular_diffusers/components_manager.md | 508 ++++-------------- .../loop_sequential_pipeline_blocks.md | 6 +- .../modular_diffusers_states.md | 2 +- .../en/modular_diffusers/modular_pipeline.md | 35 +- docs/source/en/modular_diffusers/overview.md | 6 +- .../sequential_pipeline_blocks.md | 4 +- 9 files changed, 143 insertions(+), 432 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 3939eafeeb1a..37b4bc9b05d5 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -129,7 +129,7 @@ - local: modular_diffusers/modular_pipeline title: ModularPipeline - local: modular_diffusers/components_manager - title: Components Manager + title: ComponentsManager - local: modular_diffusers/guiders title: Guiders diff --git a/docs/source/en/api/modular_diffusers/pipeline_components.md b/docs/source/en/api/modular_diffusers/pipeline_components.md index 0e05009096fe..2d8e10aef6d8 100644 --- a/docs/source/en/api/modular_diffusers/pipeline_components.md +++ b/docs/source/en/api/modular_diffusers/pipeline_components.md @@ -10,4 +10,8 @@ ## ComponentsManager -[[autodoc]] diffusers.modular_pipelines.components_manager.ComponentsManager \ No newline at end of file +[[autodoc]] diffusers.modular_pipelines.components_manager.ComponentsManager + +## InsertableDict + +[[autodoc]] diffusers.modular_pipelines.modular_pipeline_utils.InsertableDict \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/auto_pipeline_blocks.md b/docs/source/en/modular_diffusers/auto_pipeline_blocks.md index b17766ecf5e1..fec5473be3d8 100644 --- a/docs/source/en/modular_diffusers/auto_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/auto_pipeline_blocks.md @@ -16,7 +16,7 @@ specific language governing permissions and limitations under the License. This guide shows how to create [`~modular_pipelines.AutoPipelineBlocks`]. -Create three [``~modular_pipelines.PipelineBlocks`] for text-to-image, image-to-image, and inpainting. These represent the different workflows available in the pipeline. +Create three [`~modular_pipelines.PipelineBlock`] for text-to-image, image-to-image, and inpainting. These represent the different workflows available in the pipeline. @@ -155,14 +155,14 @@ class AutoImageBlocks(AutoPipelineBlocks): It is **very** important to include a `description` to avoid any confusion over how to run a block and what inputs are required. While [`~modular_pipelines.AutoPipelineBlocks`] are convenient, it's conditional logic may be difficult to figure out if it isn't properly explained. -Create an instance of `AutoImageBlocks` and use [`~modular_pipelines.ModularPipeline.init_pipeline`] to convert it to a pipeline. +Create an instance of `AutoImageBlocks` and use [`~modular_pipelines.ModularPipelineBlocks.init_pipeline`] to convert it to a pipeline. ```py auto_blocks = AutoImageBlocks() auto_pipeline = auto_blocks.init_pipeline() ``` -For more complex compositions, nested [`~modular_pipelines.AutoPipelineBlocks`] blocks when they're used as sub-blocks in larger pipelines, use the [`~modular_pipelines.PipelineBlocks.get_execution_blocks`] method to extract the a block that is actually run based on your input. +For more complex compositions, nested [`~modular_pipelines.AutoPipelineBlocks`] blocks when they're used as sub-blocks in larger pipelines, use the [`~modular_pipelines.SequentialPipelineBlocks.get_execution_blocks`] method to extract the a block that is actually run based on your input. ```py auto_blocks.get_execution_blocks("mask") diff --git a/docs/source/en/modular_diffusers/components_manager.md b/docs/source/en/modular_diffusers/components_manager.md index 15b6c66b9b06..565938b39443 100644 --- a/docs/source/en/modular_diffusers/components_manager.md +++ b/docs/source/en/modular_diffusers/components_manager.md @@ -10,118 +10,123 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# Components Manager +# ComponentsManager - +The [`ComponentsManager`] is a model registry and management system for Modular Diffusers. It adds and tracks models, stores useful metadata (model size, device placement, adapters), prevents duplicate model instances, and supports offloading. -🧪 **Experimental Feature**: This is an experimental feature we are actively developing. The API may be subject to breaking changes. +This guide will show you how to use [`ComponentsManager`] to manage components and device memory. - +## Add a component -The Components Manager is a central model registry and management system in diffusers. It lets you add models then reuse them across multiple pipelines and workflows. It tracks all models in one place with useful metadata such as model size, device placement and loaded adapters (LoRA, IP-Adapter). It has mechanisms in place to prevent duplicate model instances, enables memory-efficient sharing. Most significantly, it offers offloading that works across pipelines — unlike regular DiffusionPipeline offloading (i.e. `enable_model_cpu_offload` and `enable_sequential_cpu_offload`) which is limited to one pipeline with predefined sequences, the Components Manager automatically manages your device memory across all your models and workflows. +The [`ComponentsManager`] should be created alongside a [`ModularPipeline`] in either [`~ModularPipeline.from_pretrained`] or [`~ModularPipelineBlocks.init_pipeline`]. +> [!TIP] +> The `collection` parameter is optional but makes it easier to organize and manage components. -## Basic Operations - -Let's start with the most basic operations. First, create a Components Manager: + + ```py -from diffusers import ComponentsManager +from diffusers import ModularPipeline, ComponentsManager + comp = ComponentsManager() +pipe = ModularPipeline.from_pretrained("YiYiXu/modular-demo-auto", components_manager=comp, collection="test1") ``` -Use the `add(name, component)` method to register a component. It returns a unique ID that combines the component name with the object's unique identifier (using Python's `id()` function): + + ```py -from diffusers import AutoModel -text_encoder = AutoModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder") -# Returns component_id like 'text_encoder_139917733042864' -component_id = comp.add("text_encoder", text_encoder) -``` +from diffusers import ComponentsManager +from diffusers.modular_pipelines import SequentialPipelineBlocks +from diffusers.modular_pipelines.stable_diffusion_xl import TEXT2IMAGE_BLOCKS -You can view all registered components and their metadata: +t2i_blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS) -```py ->>> comp -Components: -=============================================================================================================================================== -Models: ------------------------------------------------------------------------------------------------------------------------------------------------ -Name_ID | Class | Device: act(exec) | Dtype | Size (GB) | Load ID | Collection ------------------------------------------------------------------------------------------------------------------------------------------------ -text_encoder_139917733042864 | CLIPTextModel | cpu | torch.float32 | 0.46 | N/A | N/A ------------------------------------------------------------------------------------------------------------------------------------------------ - -Additional Component Info: -================================================== +modular_repo_id = "YiYiXu/modular-loader-t2i-0704" +components = ComponentsManager() +t2i_pipeline = t2i_blocks.init_pipeline(modular_repo_id, components_manager=components) ``` -And remove components using their unique ID: + + + +Components are only loaded and registered when using [`~ModularPipeline.load_components`] or [`~ModularPipeline.load_default_components`]. The example below uses [`~ModularPipeline.load_default_components`] to create a second pipeline that reuses all the components from the first one, and assign it to a different collection ```py -comp.remove("text_encoder_139917733042864") +pipe.load_default_components() +pipe2 = ModularPipeline.from_pretrained("YiYiXu/modular-demo-auto", components_manager=comp, collection="test2") ``` -## Duplicate Detection +Use the [`~ModularPipeline.null_component_names`] property to identify any components that need to be loaded, retrieve them with [`~ComponentsManager.get_components_by_names`], and then call [`~ModularPipeline.update_components`] to add the missing components. -The Components Manager automatically detects and prevents duplicate model instances to save memory and avoid confusion. Let's walk through how this works in practice. +```py +pipe2.null_component_names +['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'image_encoder', 'unet', 'vae', 'scheduler', 'controlnet'] -When you try to add the same object twice, the manager will warn you and return the existing ID: +comp_dict = comp.get_components_by_names(names=pipe2.null_component_names) +pipe2.update_components(**comp_dict) +``` + +To add individual components, use the [`~ComponentsManager.add`] method. This registers a component with a unique identifier. ```py ->>> comp.add("text_encoder", text_encoder) -'text_encoder_139917733042864' ->>> comp.add("text_encoder", text_encoder) -ComponentsManager: component 'text_encoder' already exists as 'text_encoder_139917733042864' -'text_encoder_139917733042864' +from diffusers import AutoModel + +text_encoder = AutoModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder") +component_id = comp.add("text_encoder", text_encoder) +comp ``` -Even if you add the same object under a different name, it will still be detected as a duplicate: +Use [`~ComponentsManager.remove`] to remove a component using their id. ```py ->>> comp.add("clip", text_encoder) -ComponentsManager: adding component 'clip' as 'clip_139917733042864', but it is duplicate of 'text_encoder_139917733042864' -To remove a duplicate, call `components_manager.remove('')`. -'clip_139917733042864' +comp.remove("text_encoder_139917733042864") ``` -However, there's a more subtle case where duplicate detection becomes tricky. When you load the same model into different objects, the manager can't detect duplicates unless you use `ComponentSpec`. For example: +## Retrieve a component + +The [`ComponentsManager`] provides several methods to retrieve registered components. + +### get_one + +The [`~ComponentsManager.get_one`] method returns a single component and supports pattern matching for the `name` parameter. If multiple components match, [`~ComponentsManager.get_one`] returns an error. + +| Pattern | Example | Description | +|-------------|----------------------------------|-------------------------------------------| +| exact | `comp.get_one(name="unet")` | exact name match | +| wildcard | `comp.get_one(name="unet*")` | names starting with "unet" | +| exclusion | `comp.get_one(name="!unet")` | exclude components named "unet" | +| or | `comp.get_one(name="unet|vae")` | name is "unet" or "vae" | + +[`~ComponentsManager.get_one`] also filters components by the `collection` argument or `load_id` argument. ```py ->>> text_encoder_2 = AutoModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder") ->>> comp.add("text_encoder", text_encoder_2) -'text_encoder_139917732983664' +comp.get_one(name="unet", collection="sdxl") ``` -This creates a problem - you now have two copies of the same model consuming double the memory: +### get_components_by_names + +The [`~ComponentsManager.get_components_by_names`] method accepts a list of names and returns a dictionary mapping names to components. This is especially useful with a [`ModularPipeline`] since they provide lists of required component names and the returned dictionary can be passed directly to [`~ModularPipeline.update_components`]. ```py ->>> comp -Components: -=============================================================================================================================================== -Models: ------------------------------------------------------------------------------------------------------------------------------------------------ -Name_ID | Class | Device: act(exec) | Dtype | Size (GB) | Load ID | Collection ------------------------------------------------------------------------------------------------------------------------------------------------ -text_encoder_139917733042864 | CLIPTextModel | cpu | torch.float32 | 0.46 | N/A | N/A -clip_139917733042864 | CLIPTextModel | cpu | torch.float32 | 0.46 | N/A | N/A -text_encoder_139917732983664 | CLIPTextModel | cpu | torch.float32 | 0.46 | N/A | N/A ------------------------------------------------------------------------------------------------------------------------------------------------ - -Additional Component Info: -================================================== +component_dict = comp.get_components_by_names(names=["text_encoder", "unet", "vae"]) +{"text_encoder": component1, "unet": component2, "vae": component3} ``` -We recommend using `ComponentSpec` to load your models. Models loaded with `ComponentSpec` get tagged with a unique ID that encodes their loading parameters, allowing the Components Manager to detect when different objects represent the same underlying checkpoint: +## Duplicate detection + +It is recommended to load model components with [`ComponentSpec`] to assign components with a unique id that encodes their loading parameters. This allows [`ComponentsManager`] to automatically detect and prevent duplicate model instances even when different objects represent the same underlying checkpoint. ```py from diffusers import ComponentSpec, ComponentsManager from transformers import CLIPTextModel + comp = ComponentsManager() # Create ComponentSpec for the first text encoder spec = ComponentSpec(name="text_encoder", repo="stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder", type_hint=AutoModel) -# Create ComponentSpec for a duplicate text encoder (it is same checkpoint, from same repo/subfolder) +# Create ComponentSpec for a duplicate text encoder (it is same checkpoint, from the same repo/subfolder) spec_duplicated = ComponentSpec(name="text_encoder_duplicated", repo="stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder", type_hint=CLIPTextModel) # Load and add both components - the manager will detect they're the same model @@ -129,42 +134,36 @@ comp.add("text_encoder", spec.load()) comp.add("text_encoder_duplicated", spec_duplicated.load()) ``` -Now the manager detects the duplicate and warns you: +This returns a warning with instructions for removing the duplicate. -```out +```py ComponentsManager: adding component 'text_encoder_duplicated_139917580682672', but it has duplicate load_id 'stabilityai/stable-diffusion-xl-base-1.0|text_encoder|null|null' with existing components: text_encoder_139918506246832. To remove a duplicate, call `components_manager.remove('')`. 'text_encoder_duplicated_139917580682672' ``` -Both models now show the same `load_id`, making it clear they're the same model: +You could also add a component without using [`ComponentSpec`] and duplicate detection still works in most cases even if you're adding the same component under a different name. + +However, [`ComponentManager`] can't detect duplicates when you load the same component into different objects. In this case, you should load a model with [`ComponentSpec`]. ```py ->>> comp -Components: -====================================================================================================================================================================================================== -Models: ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -Name_ID | Class | Device: act(exec) | Dtype | Size (GB) | Load ID | Collection ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -text_encoder_139918506246832 | CLIPTextModel | cpu | torch.float32 | 0.46 | stabilityai/stable-diffusion-xl-base-1.0|text_encoder|null|null | N/A -text_encoder_duplicated_139917580682672 | CLIPTextModel | cpu | torch.float32 | 0.46 | stabilityai/stable-diffusion-xl-base-1.0|text_encoder|null|null | N/A ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -Additional Component Info: -================================================== +text_encoder_2 = AutoModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="text_encoder") +comp.add("text_encoder", text_encoder_2) +'text_encoder_139917732983664' ``` ## Collections -Collections are labels you can assign to components for better organization and management. You add a component under a collection by passing the `collection=` parameter when you add the component to the manager, i.e. `add(name, component, collection=...)`. Within each collection, only one component per name is allowed - if you add a second component with the same name, the first one is automatically removed. +Collections are labels you can assign to components for better organization and management. Add a component to a collection with the `collection` argument in [`~ComponentsManager.add`]. -Here's how collections work in practice: +Only one component per name is allowed in each collection. Adding a second component with the same name automatically removes the first component. ```py +from diffusers import ComponentSpec, ComponentsManager + comp = ComponentsManager() -# Create ComponentSpec for the first UNet (SDXL base) +# Create ComponentSpec for the first UNet spec = ComponentSpec(name="unet", repo="stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", type_hint=AutoModel) -# Create ComponentSpec for a different UNet (Juggernaut-XL) +# Create ComponentSpec for a different UNet spec2 = ComponentSpec(name="unet", repo="RunDiffusion/Juggernaut-XL-v9", subfolder="unet", type_hint=AutoModel, variant="fp16") # Add both UNets to the same collection - the second one will replace the first @@ -172,343 +171,20 @@ comp.add("unet", spec.load(), collection="sdxl") comp.add("unet", spec2.load(), collection="sdxl") ``` -The manager automatically removes the old UNet and adds the new one: - -```out -ComponentsManager: removing existing unet from collection 'sdxl': unet_139917723891888 -'unet_139917723893136' -``` - -Only one UNet remains in the collection: - -```py ->>> comp -Components: -==================================================================================================================================================================== -Models: --------------------------------------------------------------------------------------------------------------------------------------------------------------------- -Name_ID | Class | Device: act(exec) | Dtype | Size (GB) | Load ID | Collection --------------------------------------------------------------------------------------------------------------------------------------------------------------------- -unet_139917723893136 | UNet2DConditionModel | cpu | torch.float32 | 9.56 | RunDiffusion/Juggernaut-XL-v9|unet|fp16|null | sdxl --------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -Additional Component Info: -================================================== -``` - -For example, in node-based systems, you can mark all models loaded from one node with the same collection label, automatically replace models when user loads new checkpoints under same name, batch delete all models in a collection when a node is removed. - -## Retrieving Components - -The Components Manager provides several methods to retrieve registered components. - -The `get_one()` method returns a single component and supports pattern matching for the `name` parameter. You can use: -- exact matches like `comp.get_one(name="unet")` -- wildcards like `comp.get_one(name="unet*")` for components starting with "unet" -- exclusion patterns like `comp.get_one(name="!unet")` to exclude components named "unet" -- OR patterns like `comp.get_one(name="unet|vae")` to match either "unet" OR "vae". - -Optionally, You can add collection and load_id as filters e.g. `comp.get_one(name="unet", collection="sdxl")`. If multiple components match, `get_one()` throws an error. - -Another useful method is `get_components_by_names()`, which takes a list of names and returns a dictionary mapping names to components. This is particularly helpful with modular pipelines since they provide lists of required component names, and the returned dictionary can be directly passed to `pipeline.update_components()`. - -```py -# Get components by name list -component_dict = comp.get_components_by_names(names=["text_encoder", "unet", "vae"]) -# Returns: {"text_encoder": component1, "unet": component2, "vae": component3} -``` - -## Using Components Manager with Modular Pipelines - -The Components Manager integrates seamlessly with Modular Pipelines. All you need to do is pass a Components Manager instance to `from_pretrained()` or `init_pipeline()` with an optional `collection` parameter: - -```py -from diffusers import ModularPipeline, ComponentsManager -comp = ComponentsManager() -pipe = ModularPipeline.from_pretrained("YiYiXu/modular-demo-auto", components_manager=comp, collection="test1") -``` - -By default, modular pipelines don't load components immediately, so both the pipeline and Components Manager start empty: - -```py ->>> comp -Components: -================================================== -No components registered. -================================================== -``` - -When you load components on the pipeline, they are automatically registered in the Components Manager: - -```py ->>> pipe.load_components(names="unet") ->>> comp -Components: -============================================================================================================================================================== -Models: --------------------------------------------------------------------------------------------------------------------------------------------------------------- -Name_ID | Class | Device: act(exec) | Dtype | Size (GB) | Load ID | Collection --------------------------------------------------------------------------------------------------------------------------------------------------------------- -unet_139917726686304 | UNet2DConditionModel | cpu | torch.float32 | 9.56 | SG161222/RealVisXL_V4.0|unet|null|null | test1 --------------------------------------------------------------------------------------------------------------------------------------------------------------- - -Additional Component Info: -================================================== -``` - -Now let's load all default components and then create a second pipeline that reuses all components from the first one. We pass the same Components Manager to the second pipeline but with a different collection: - -```py -# Load all default components ->>> pipe.load_default_components() - -# Create a second pipeline using the same Components Manager but with a different collection ->>> pipe2 = ModularPipeline.from_pretrained("YiYiXu/modular-demo-auto", components_manager=comp, collection="test2") -``` - -As mentioned earlier, `ModularPipeline` has a property `null_component_names` that returns a list of component names it needs to load. We can conveniently use this list with the `get_components_by_names` method on the Components Manager: +This makes it convenient to work with node-based systems because you can: -```py -# Get the list of components that pipe2 needs to load ->>> pipe2.null_component_names -['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'image_encoder', 'unet', 'vae', 'scheduler', 'controlnet'] - -# Retrieve all required components from the Components Manager ->>> comp_dict = comp.get_components_by_names(names=pipe2.null_component_names) - -# Update the pipeline with the retrieved components ->>> pipe2.update_components(**comp_dict) -``` - -The warnings that follow are expected and indicate that the Components Manager is correctly identifying that these components already exist and will be reused rather than creating duplicates: - -```out -ComponentsManager: component 'text_encoder' already exists as 'text_encoder_139917586016400' -ComponentsManager: component 'text_encoder_2' already exists as 'text_encoder_2_139917699973424' -ComponentsManager: component 'tokenizer' already exists as 'tokenizer_139917580599504' -ComponentsManager: component 'tokenizer_2' already exists as 'tokenizer_2_139915763443904' -ComponentsManager: component 'image_encoder' already exists as 'image_encoder_139917722468304' -ComponentsManager: component 'unet' already exists as 'unet_139917580609632' -ComponentsManager: component 'vae' already exists as 'vae_139917722459040' -ComponentsManager: component 'scheduler' already exists as 'scheduler_139916266559408' -ComponentsManager: component 'controlnet' already exists as 'controlnet_139917722454432' -``` - - -The pipeline is now fully loaded: - -```py -# null_component_names return empty list, meaning everything are loaded ->>> pipe2.null_component_names -[] -``` - -No new components were added to the Components Manager - we're reusing everything. All models are now associated with both `test1` and `test2` collections, showing that these components are shared across multiple pipelines: -```py ->>> comp -Components: -======================================================================================================================================================================================== -Models: ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -Name_ID | Class | Device: act(exec) | Dtype | Size (GB) | Load ID | Collection ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -text_encoder_139917586016400 | CLIPTextModel | cpu | torch.float32 | 0.46 | SG161222/RealVisXL_V4.0|text_encoder|null|null | test1 - | | | | | | test2 -text_encoder_2_139917699973424 | CLIPTextModelWithProjection | cpu | torch.float32 | 2.59 | SG161222/RealVisXL_V4.0|text_encoder_2|null|null | test1 - | | | | | | test2 -unet_139917580609632 | UNet2DConditionModel | cpu | torch.float32 | 9.56 | SG161222/RealVisXL_V4.0|unet|null|null | test1 - | | | | | | test2 -controlnet_139917722454432 | ControlNetModel | cpu | torch.float32 | 4.66 | diffusers/controlnet-canny-sdxl-1.0|null|null|null | test1 - | | | | | | test2 -vae_139917722459040 | AutoencoderKL | cpu | torch.float32 | 0.31 | SG161222/RealVisXL_V4.0|vae|null|null | test1 - | | | | | | test2 -image_encoder_139917722468304 | CLIPVisionModelWithProjection | cpu | torch.float32 | 6.87 | h94/IP-Adapter|sdxl_models/image_encoder|null|null | test1 - | | | | | | test2 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -Other Components: ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -ID | Class | Collection ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -tokenizer_139917580599504 | CLIPTokenizer | test1 - | | test2 -scheduler_139916266559408 | EulerDiscreteScheduler | test1 - | | test2 -tokenizer_2_139915763443904 | CLIPTokenizer | test1 - | | test2 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -Additional Component Info: -================================================== -``` +- Mark all models loaded from one node with the `collection` label. +- Automatically replace models when new checkpoints are loaded under the same name. +- Batch delete all models in a collection when a node is removed. +## Offloading -## Automatic Memory Management - -The Components Manager provides a global offloading strategy across all models, regardless of which pipeline is using them: +The [`~ComponentsManager.enable_auto_cpu_offload`] method is a global offloading strategy that works across all models regardless of which pipeline is using them. Once enabled, you don't need to worry about device placement if you add or remove components. ```py comp.enable_auto_cpu_offload(device="cuda") ``` -When enabled, all models start on CPU. The manager moves models to the device right before they're used and moves other models back to CPU when GPU memory runs low. You can set your own rules for which models to offload first. This works smoothly as you add or remove components. Once it's on, you don't need to worry about device placement - you can focus on your workflow. - - - -## Practical Example: Building Modular Workflows with Component Reuse - -Now that we've covered the basics of the Components Manager, let's walk through a practical example that shows how to build workflows in a modular setting and use the Components Manager to reuse components across multiple pipelines. This example demonstrates the true power of Modular Diffusers by working with multiple pipelines that can share components. - -In this example, we'll generate latents from a text-to-image pipeline, then refine them with an image-to-image pipeline. - -Let's create a modular text-to-image workflow by separating it into three workflows: `text_blocks` for encoding prompts, `t2i_blocks` for generating latents, and `decoder_blocks` for creating final images. - -```py -import torch -from diffusers.modular_pipelines import SequentialPipelineBlocks -from diffusers.modular_pipelines.stable_diffusion_xl import ALL_BLOCKS - -# Create modular blocks and separate text encoding and decoding steps -t2i_blocks = SequentialPipelineBlocks.from_blocks_dict(ALL_BLOCKS["text2img"]) -text_blocks = t2i_blocks.sub_blocks.pop("text_encoder") -decoder_blocks = t2i_blocks.sub_blocks.pop("decode") -``` - -Now we will convert them into runnalbe pipelines and set up the Components Manager with auto offloading and organize components under a "t2i" collection - -Since we now have 3 different workflows that share components, we create a separate pipeline that serves as a dedicated loader to load all the components, register them to the component manager, and then reuse them across different workflows. - -```py -from diffusers import ComponentsManager, ModularPipeline - -# Set up Components Manager with auto offloading -components = ComponentsManager() -components.enable_auto_cpu_offload(device="cuda") - -# Create a new pipeline to load the components -t2i_repo = "YiYiXu/modular-demo-auto" -t2i_loader_pipe = ModularPipeline.from_pretrained(t2i_repo, components_manager=components, collection="t2i") - -# convert the 3 blocks into pipelines and attach the same components manager to all 3 -text_node = text_blocks.init_pipeline(t2i_repo, components_manager=components) -decoder_node = decoder_blocks.init_pipeline(t2i_repo, components_manager=components) -t2i_pipe = t2i_blocks.init_pipeline(t2i_repo, components_manager=components) -``` - -Load all components into the loader pipeline, they should all be automatically registered to Components Manager under the "t2i" collection: - -```py -# Load all components (including IP-Adapter and ControlNet for later use) -t2i_loader_pipe.load_default_components(torch_dtype=torch.float16) -``` - -Now distribute the loaded components to each pipeline: - -```py -# Get VAE for decoder (using get_one since there's only one) -vae = components.get_one(load_id="SG161222/RealVisXL_V4.0|vae|null|null") -decoder_node.update_components(vae=vae) - -# Get text components for text node (using get_components_by_names for multiple components) -text_components = components.get_components_by_names(text_node.null_component_names) -text_node.update_components(**text_components) - -# Get remaining components for t2i pipeline -t2i_components = components.get_components_by_names(t2i_pipe.null_component_names) -t2i_pipe.update_components(**t2i_components) -``` - -Now we can generate images using our modular workflow: - -```py -# Generate text embeddings -prompt = "an astronaut" -text_embeddings = text_node(prompt=prompt, output=["prompt_embeds","negative_prompt_embeds", "pooled_prompt_embeds", "negative_pooled_prompt_embeds"]) - -# Generate latents and decode to image -generator = torch.Generator(device="cuda").manual_seed(0) -latents_t2i = t2i_pipe(**text_embeddings, num_inference_steps=25, generator=generator, output="latents") -image = decoder_node(latents=latents_t2i, output="images")[0] -image.save("modular_part2_t2i.png") -``` - -Let's add a LoRA: - -```py -# Load LoRA weights ->>> t2i_loader_pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy_face") ->>> components -Components: -============================================================================================================================================================ -... -Additional Component Info: -================================================== - -unet: - Adapters: ['toy_face'] -``` - -You can see that the Components Manager tracks adapters metadata for all models it manages, and in our case, only Unet has lora loaded. This means we can reuse existing text embeddings. - -```py -# Generate with LoRA (reusing existing text embeddings) -generator = torch.Generator(device="cuda").manual_seed(0) -latents_lora = t2i_pipe(**text_embeddings, num_inference_steps=25, generator=generator, output="latents") -image = decoder_node(latents=latents_lora, output="images")[0] -image.save("modular_part2_lora.png") -``` - - -Now let's create a refiner pipeline that reuses components from our text-to-image workflow: - -```py -# Create refiner blocks (removing image_encoder and decode since we work with latents) -refiner_blocks = SequentialPipelineBlocks.from_blocks_dict(ALL_BLOCKS["img2img"]) -refiner_blocks.sub_blocks.pop("image_encoder") -refiner_blocks.sub_blocks.pop("decode") - -# Create refiner pipeline with different repo and collection, -# Attach the same component manager to it -refiner_repo = "YiYiXu/modular_refiner" -refiner_pipe = refiner_blocks.init_pipeline(refiner_repo, components_manager=components, collection="refiner") -``` - -We pass the **same Components Manager** (`components`) to the refiner pipeline, but with a **different collection** (`"refiner"`). This allows the refiner to access and reuse components from the "t2i" collection while organizing its own components (like the refiner UNet) under the "refiner" collection. - -```py -# Load only the refiner UNet (different from t2i UNet) -refiner_pipe.load_components(names="unet", torch_dtype=torch.float16) - -# Reuse components from t2i pipeline using pattern matching -reuse_components = components.search_components("text_encoder_2|scheduler|vae|tokenizer_2") -refiner_pipe.update_components(**reuse_components) -``` - -When we reuse components from the "t2i" collection, they automatically get added to the "refiner" collection as well. You can verify this by checking the Components Manager - you'll see components like `vae`, `scheduler`, etc. listed under both collections, indicating they're shared between workflows. - -Now we can refine any of our generated latents: - -```py -# Refine all our different latents -refined_latents = refiner_pipe(image_latents=latents_t2i, prompt=prompt, num_inference_steps=10, output="latents") -refined_image = decoder_node(latents=refined_latents, output="images")[0] -refined_image.save("modular_part2_t2i_refine_out.png") - -refined_latents = refiner_pipe(image_latents=latents_lora, prompt=prompt, num_inference_steps=10, output="latents") -refined_image = decoder_node(latents=refined_latents, output="images")[0] -refined_image.save("modular_part2_lora_refine_out.png") -``` - - -Here are the results from our modular pipeline examples. - -#### Base Text-to-Image Generation -| Base Text-to-Image | Base Text-to-Image (Refined) | -|-------------------|------------------------------| -| ![Base T2I](https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/modular_quicktour/modular_part2_t2i.png) | ![Base T2I Refined](https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/modular_quicktour/modular_part2_t2i_refine_out.png) | - -#### LoRA -| LoRA | LoRA (Refined) | -|-------------------|------------------------------| -| ![LoRA](https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/modular_quicktour/modular_part2_lora.png) | ![LoRA Refined](https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/modular_quicktour/modular_part2_lora_refine_out.png) | +All models begin on the CPU and [`ComponentsManager`] moves them to the appropriate device right before they're needed, and moves other models back to the CPU when GPU memory is low. +You can set your own rules for which models to offload first. \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md index b3541267685d..1d819baabf0a 100644 --- a/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md @@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License. # LoopSequentialPipelineBlocks -[`~modular_pipelines.LoopSequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.PipelineBlocks`] together in a loop. Data flows circularly, using `intermediate_inputs` and `intermediate_outputs`, and each block is run iteratively. This is typically used to create a denoising loop which is iterative by default. +[`~modular_pipelines.LoopSequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.PipelineBlock`] together in a loop. Data flows circularly, using `intermediate_inputs` and `intermediate_outputs`, and each block is run iteratively. This is typically used to create a denoising loop which is iterative by default. This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. @@ -80,13 +80,13 @@ class LoopBlock(PipelineBlock): ## LoopSequentialPipelineBlocks -Use the [~modular_pipelines.LoopSequentialPipelineBlocks.from_blocks_dict`] method to add the loop block to the loop wrapper to create [~modular_pipelines.LoopSequentialPipelineBlocks`]. +Use the [`~modular_pipelines.LoopSequentialPipelineBlocks.from_blocks_dict`] method to add the loop block to the loop wrapper to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. ```py loop = LoopWrapper.from_blocks_dict({"block1": LoopBlock}) ``` -Add more loop blocks to run within each iteration with [~modular_pipelines.LoopSequentialPipelineBlocks.from_blocks_dict`]. This allows you to modify the blocks without changing the loop logic itself. +Add more loop blocks to run within each iteration with [`~modular_pipelines.LoopSequentialPipelineBlocks.from_blocks_dict`]. This allows you to modify the blocks without changing the loop logic itself. ```py loop = LoopWrapper.from_blocks_dict({"block1": LoopBlock(), "block2": LoopBlock}) diff --git a/docs/source/en/modular_diffusers/modular_diffusers_states.md b/docs/source/en/modular_diffusers/modular_diffusers_states.md index bb9d067d02d4..e42bf9497041 100644 --- a/docs/source/en/modular_diffusers/modular_diffusers_states.md +++ b/docs/source/en/modular_diffusers/modular_diffusers_states.md @@ -72,7 +72,7 @@ def __call__(self, components, state): ## State interaction -[`~modular_pipelines.PipelineState`] and [`BlockState`] interaction is defined by a block's `inputs`, `intermediate_inputs`, and `intermediate_outputs`. +[`~modular_pipelines.PipelineState`] and [`~modular_pipelines.BlockState`] interaction is defined by a block's `inputs`, `intermediate_inputs`, and `intermediate_outputs`. - `inputs`, a block can modify an input - like `block_state.image` - but the change is local to the [`~modular_pipelines.BlockState`] and won't affect the original image in [`~modular_pipelines.PipelineState`]. - `intermediate_inputs`, is often values created from a previous block. When a block modifies `intermediate_inputs` - like `batch_size` - this change is reflected in both the [`~modular_pipelines.BlockState`] and [`~modular_pipelines.PipelineState`]. Any subsequent blocks are also affected. diff --git a/docs/source/en/modular_diffusers/modular_pipeline.md b/docs/source/en/modular_diffusers/modular_pipeline.md index fcc7dbebb530..ee2a659985cc 100644 --- a/docs/source/en/modular_diffusers/modular_pipeline.md +++ b/docs/source/en/modular_diffusers/modular_pipeline.md @@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License. # ModularPipeline -[`ModularPipeline`] converts [`PipelineBlock`]'s into an executable pipeline that loads models and performs the computation steps defined in a block. It is the main interface for users to run a pipeline and it is very similar to the [`DiffusionPipeline`] API. +[`ModularPipeline`] converts [`~modular_pipelines.PipelineBlock`]'s into an executable pipeline that loads models and performs the computation steps defined in a block. It is the main interface for users to run a pipeline and it is very similar to the [`DiffusionPipeline`] API. The main difference is to include an expected `output` argument in the pipeline. @@ -92,6 +92,37 @@ image.save("moduar_inpaint_out.png") This guide will show you how to create a [`ModularPipeline`] and manage the components in it. +## Adding blocks + +Blocks are [`InsertableDict`] objects that can be inserted at specific positions, providing a flexible way to mix-and-match blocks. + +Use [`~modular_pipelines.modular_pipeline_utils.InsertableDict.insert`] on either the block class or `sub_blocks` attribute to add a block. + +```py +# BLOCKS is dict of block classes, you need to add class to it +BLOCKS.insert("block_name", BlockClass, index) +# sub_blocks attribute contains instance, add a block instance to the attribute +t2i_blocks.sub_blocks.insert("block_name", block_instance, index) +``` + +Use [`~modular_pipelines.modular_pipeline_utils.InsertableDict.pop`] on either the block class or `sub_blocks` attribute to remove a block. + +```py +# remove a block class from preset +BLOCKS.pop("text_encoder") +# split out a block instance on its own +text_encoder_block = t2i_blocks.sub_blocks.pop("text_encoder") +``` + +Swap blocks by setting the existing block to the new block. + +```py +# Replace block class in preset +BLOCKS["prepare_latents"] = CustomPrepareLatents +# Replace in sub_blocks attribute using an block instance +t2i_blocks.sub_blocks["prepare_latents"] = CustomPrepareLatents() +``` + ## Creating a pipeline There are two ways to create a [`ModularPipeline`]. Assemble and create a pipeline from [`PipelineBlocks`] or load an existing pipeline with [`~ModularPipeline.from_pretrained`]. @@ -104,7 +135,7 @@ You should also initialize a [`ComponentsManager`] to handle device placement an -Use the [`ModularPipelineBlocks.init_pipeline`] method to create a [`ModularPipeline`] from the component and configuration specifications. This method loads the *specifications* from a `modular_model_index.json` file, but it doesn't load the *models* yet. +Use the [`~ModularPipelineBlocks.init_pipeline`] method to create a [`ModularPipeline`] from the component and configuration specifications. This method loads the *specifications* from a `modular_model_index.json` file, but it doesn't load the *models* yet. ```py from diffusers import ComponentsManager diff --git a/docs/source/en/modular_diffusers/overview.md b/docs/source/en/modular_diffusers/overview.md index 6f8e00f8a4dd..085318d550f3 100644 --- a/docs/source/en/modular_diffusers/overview.md +++ b/docs/source/en/modular_diffusers/overview.md @@ -35,6 +35,6 @@ The Modular Diffusers docs are organized as shown below. - [States](./modular_diffusers_states) explains how data is shared and communicated between pipeline blocks and [`ModularPipeline`]. - [PipelineBlock](./pipeline_block) is the most basic unit of a [`ModularPipeline`] and this guide shows you how to create one. -- [SequentialPipelineBlocks](./sequential_pipeline_blocks) is a type of block that chains multiple blocks so they run one after another, passing data along the chain. This guide shows you how to create [`SequentialPipelineBlocks`] and how they connect and work together. -- [LoopSequentialPipelineBlocks](./loop_sequential_pipeline_blocks) is a type of block that runs a series of blocks in a loop. This guide shows you how to create [`LoopSequentialPipelineBlocks`]. -- [AutoPipelineBlocks](./auto_pipeline_blocks) is a type of block that automatically chooses which blocks to run based on the input. This guide shows you how to create [`AutoPipelineBlocks`]. \ No newline at end of file +- [SequentialPipelineBlocks](./sequential_pipeline_blocks) is a type of block that chains multiple blocks so they run one after another, passing data along the chain. This guide shows you how to create [`~modular_pipelines.SequentialPipelineBlocks`] and how they connect and work together. +- [LoopSequentialPipelineBlocks](./loop_sequential_pipeline_blocks) is a type of block that runs a series of blocks in a loop. This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. +- [AutoPipelineBlocks](./auto_pipeline_blocks) is a type of block that automatically chooses which blocks to run based on the input. This guide shows you how to create [`~modular_pipelines.AutoPipelineBlocks`]. \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md index 95bf13adfb57..1381377e21ea 100644 --- a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md @@ -12,11 +12,11 @@ specific language governing permissions and limitations under the License. # SequentialPipelineBlocks -[`~modular_pipelines.SequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.PipelineBlocks`] together in a sequence. Data flows linearly from one block to the next using `intermediate_inputs` and `intermediate_outputs`. Each block in [`~modular_pipelines.SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline. +[`~modular_pipelines.SequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.PipelineBlock`] together in a sequence. Data flows linearly from one block to the next using `intermediate_inputs` and `intermediate_outputs`. Each block in [`~modular_pipelines.SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline. This guide shows you how to connect two blocks into a [`~modular_pipelines.SequentialPipelineBlocks`]. -Create two [`~modular_pipelines.PipelineBlocks`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `intermediate_inputs`. +Create two [`~modular_pipelines.PipelineBlock`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `intermediate_inputs`. From 5ee815b251c10ea824f45e8d14e420260ccb110c Mon Sep 17 00:00:00 2001 From: stevhliu Date: Wed, 30 Jul 2025 12:03:12 -0700 Subject: [PATCH 12/15] reviews --- docs/source/en/_toctree.yml | 4 +- .../modular_diffusers/auto_pipeline_blocks.md | 5 +- .../modular_diffusers/components_manager.md | 10 +- .../en/modular_diffusers/end_to_end_guide.md | 417 ------------------ docs/source/en/modular_diffusers/guiders.md | 8 +- .../loop_sequential_pipeline_blocks.md | 8 +- .../modular_diffusers_states.md | 10 +- .../en/modular_diffusers/modular_pipeline.md | 20 +- docs/source/en/modular_diffusers/overview.md | 21 +- .../en/modular_diffusers/pipeline_block.md | 16 +- 10 files changed, 51 insertions(+), 468 deletions(-) delete mode 100644 docs/source/en/modular_diffusers/end_to_end_guide.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 37b4bc9b05d5..68975d8029a8 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -114,10 +114,8 @@ title: Overview - local: modular_diffusers/quickstart title: Quickstart - - local: modular_diffusers/end_to_end_guide - title: End-to-end example - local: modular_diffusers/modular_diffusers_states - title: Block states + title: States - local: modular_diffusers/pipeline_block title: PipelineBlock - local: modular_diffusers/sequential_pipeline_blocks diff --git a/docs/source/en/modular_diffusers/auto_pipeline_blocks.md b/docs/source/en/modular_diffusers/auto_pipeline_blocks.md index fec5473be3d8..970e389dfac7 100644 --- a/docs/source/en/modular_diffusers/auto_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/auto_pipeline_blocks.md @@ -155,14 +155,13 @@ class AutoImageBlocks(AutoPipelineBlocks): It is **very** important to include a `description` to avoid any confusion over how to run a block and what inputs are required. While [`~modular_pipelines.AutoPipelineBlocks`] are convenient, it's conditional logic may be difficult to figure out if it isn't properly explained. -Create an instance of `AutoImageBlocks` and use [`~modular_pipelines.ModularPipelineBlocks.init_pipeline`] to convert it to a pipeline. +Create an instance of `AutoImageBlocks`. ```py auto_blocks = AutoImageBlocks() -auto_pipeline = auto_blocks.init_pipeline() ``` -For more complex compositions, nested [`~modular_pipelines.AutoPipelineBlocks`] blocks when they're used as sub-blocks in larger pipelines, use the [`~modular_pipelines.SequentialPipelineBlocks.get_execution_blocks`] method to extract the a block that is actually run based on your input. +For more complex compositions, such as nested [`~modular_pipelines.AutoPipelineBlocks`] blocks when they're used as sub-blocks in larger pipelines, use the [`~modular_pipelines.SequentialPipelineBlocks.get_execution_blocks`] method to extract the a block that is actually run based on your input. ```py auto_blocks.get_execution_blocks("mask") diff --git a/docs/source/en/modular_diffusers/components_manager.md b/docs/source/en/modular_diffusers/components_manager.md index 565938b39443..50fa14072460 100644 --- a/docs/source/en/modular_diffusers/components_manager.md +++ b/docs/source/en/modular_diffusers/components_manager.md @@ -51,7 +51,7 @@ t2i_pipeline = t2i_blocks.init_pipeline(modular_repo_id, components_manager=comp -Components are only loaded and registered when using [`~ModularPipeline.load_components`] or [`~ModularPipeline.load_default_components`]. The example below uses [`~ModularPipeline.load_default_components`] to create a second pipeline that reuses all the components from the first one, and assign it to a different collection +Components are only loaded and registered when using [`~ModularPipeline.load_components`] or [`~ModularPipeline.load_default_components`]. The example below uses [`~ModularPipeline.load_default_components`] to create a second pipeline that reuses all the components from the first one, and assigns it to a different collection ```py pipe.load_default_components() @@ -68,7 +68,7 @@ comp_dict = comp.get_components_by_names(names=pipe2.null_component_names) pipe2.update_components(**comp_dict) ``` -To add individual components, use the [`~ComponentsManager.add`] method. This registers a component with a unique identifier. +To add individual components, use the [`~ComponentsManager.add`] method. This registers a component with a unique id. ```py from diffusers import AutoModel @@ -107,7 +107,7 @@ comp.get_one(name="unet", collection="sdxl") ### get_components_by_names -The [`~ComponentsManager.get_components_by_names`] method accepts a list of names and returns a dictionary mapping names to components. This is especially useful with a [`ModularPipeline`] since they provide lists of required component names and the returned dictionary can be passed directly to [`~ModularPipeline.update_components`]. +The [`~ComponentsManager.get_components_by_names`] method accepts a list of names and returns a dictionary mapping names to components. This is especially useful with [`ModularPipeline`] since they provide lists of required component names and the returned dictionary can be passed directly to [`~ModularPipeline.update_components`]. ```py component_dict = comp.get_components_by_names(names=["text_encoder", "unet", "vae"]) @@ -153,7 +153,7 @@ comp.add("text_encoder", text_encoder_2) ## Collections -Collections are labels you can assign to components for better organization and management. Add a component to a collection with the `collection` argument in [`~ComponentsManager.add`]. +Collections are labels assigned to components for better organization and management. Add a component to a collection with the `collection` argument in [`~ComponentsManager.add`]. Only one component per name is allowed in each collection. Adding a second component with the same name automatically removes the first component. @@ -173,7 +173,7 @@ comp.add("unet", spec2.load(), collection="sdxl") This makes it convenient to work with node-based systems because you can: -- Mark all models loaded from one node with the `collection` label. +- Mark all models as loaded from one node with the `collection` label. - Automatically replace models when new checkpoints are loaded under the same name. - Batch delete all models in a collection when a node is removed. diff --git a/docs/source/en/modular_diffusers/end_to_end_guide.md b/docs/source/en/modular_diffusers/end_to_end_guide.md deleted file mode 100644 index 5aea48004444..000000000000 --- a/docs/source/en/modular_diffusers/end_to_end_guide.md +++ /dev/null @@ -1,417 +0,0 @@ - - -# Quickstart - -Modular Diffusers is designed to help developers quickly create new workflows within a modular framework by assembling blocks into a pipeline. The blocks are highly flexible and can be reused or mixed-and-matched in many ways. You only need to create a new block if it doesn't already. - -This doc will show you how to implement the [Differential Diffusion](https://differential-diffusion.github.io/) pipeline with the modular framework in 4 steps. - -1. Identify an existing pipeline that is similar to what you're trying to implement and determine what part of the pipeline needs to be modified. -2. Build out the pipeline using existing blocks wherever possible and creating placeholders for the new blocks. -3. Test the pipeline with expected inputs and outputs. -4. Add custom logic to the blocks you want to change, and test incrementally to make sure it works. - -## Identify required blocks - -[Differential Diffusion](https://differential-diffusion.github.io/) is an image-to-image workflow that uses a map to determine the amount of change in each image region. Regions with lower pixel values are replaced with the original noised latents to preserve more of the original image. It is unlike a standard image-to-image worfklow which applies uniform noise to all pixels based on the `strength` parameter. - -This means you should start with the preset blocks for an image-to-image pipeline. - -```py -from diffusers.modular_pipelines.stable_diffusion_xl import IMAGE2IMAGE_BLOCKS - -IMAGE2IMAGE_BLOCKS = InsertableDict([ - ("text_encoder", StableDiffusionXLTextEncoderStep), - ("image_encoder", StableDiffusionXLVaeEncoderStep), - ("input", StableDiffusionXLInputStep), - ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep), - ("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep), - ("prepare_add_cond", StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep), - ("denoise", StableDiffusionXLDenoiseStep), - ("decode", StableDiffusionXLDecodeStep) -]) -``` - -The blocks that need to be modified are: - -- The `prepare_latents` block to create the change map and pre-compute noised latents for all timesteps. -- The `denoise` block to selectively apply denoising based on the change map. This block is a `LoopSequentialPipelineBlocks` which means it contains other blocks in it that it executes in a loop. - - ```py - denoise_blocks = IMAGE2IMAGE_BLOCKS["denoise"]() - print(denoise_blocks) - ``` - - Print this out to view the docs for it. You'll see that it contains 3 sub-blocks, `before_denoiser`, `denoiser`, and `after_denoiser`. The `before_denoiser` sub-block should be modified to prepare the latent input for the denoiser based on the change map. - -All of the other blocks can be reused from the existing image-to-image and text-to-image workflows. You only need to change the `prepare_latents` and `denoise` blocks. - -## Build the pipeline - -Once you've identified which blocks you need, create the pipeline. You don't need to modify the blocks yet. Create a placeholder block by copying the existing ones. - -```py -class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): - """Copied from StableDiffusionXLImg2ImgPrepareLatentsStep - will modify later""" - # ... same implementation as StableDiffusionXLImg2ImgPrepareLatentsStep -``` - -Assemble the denoise block for Differential Diffusion. Replace the `StableDiffusionXLLoopBeforeDenoiser` with `SDXLDiffDiffLoopBeforeDenoiser` because that is the only sub-block that needs modifying. - -```py -class SDXLDiffDiffDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): - block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLLoopDenoiser, StableDiffusionXLLoopAfterDenoiser] - block_names = ["before_denoiser", "denoiser", "after_denoiser"] -``` - -Now put the whole pipeline together. Copy the existing blocks and set the `prepare_latents` and `denoise` blocks to the ones you just created. - -> [!TIP] -> The `set_timesteps` block is reused from the text-to-image workflow because Differential Diffusion doesn't use the `strength` parameter. - -```py -DIFFDIFF_BLOCKS = IMAGE2IMAGE_BLOCKS.copy() -DIFFDIFF_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"] -DIFFDIFF_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep -DIFFDIFF_BLOCKS["denoise"] = SDXLDiffDiffDenoiseStep - -dd_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_BLOCKS) -print(dd_blocks) -``` - -## Test example - -The pipeline so far is a collection of blocks that specify how it should work. To add components like a UNet, call `init_pipeline` to initialize a [`ModularPipeline`] with the expected component specifications. Then call [`~ModularPipeline.load_default_components`] to load the model components into the pipeline. - -```py -dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") -dd_pipeline.load_default_componenets(torch_dtype=torch.float16) -dd_pipeline.to("cuda") -``` - -Test the pipeline with a basic example. The [`ModularPipeline`] is nearly identical to a standard [`DiffusionPipeline`] except you should specify the expected output type to `"images"`. - -```py -image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") -mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") - -prompt = "a green pear" -negative_prompt = "blurry" - -image = dd_pipeline( - prompt=prompt, - negative_prompt=negative_prompt, - num_inference_steps=25, - diffdiff_map=mask, - image=image, - output="images" -)[0] - -image.save("diffdiff_out.png") -``` - -At this point, the [`ModularPipeline`] behaves exactly like a standard image-to-image pipeline. - -## Implement custom logic - -Now that the pipeline works, start implementing the required custom logic for Differential Diffusion. - -### prepare_latents - -> [!TIP] -> Use `print(dd_pipeline.doc)` to check the inputs and outputs of the pipeline and add `print(state)` and `print(block_state)` statements inside the `__call__` method to inspect the intermediate results. -> This helps understand what is happening at each stage of the pipeline and is useful for debugging. - -The `prepare_latents` block requires a processor to process the change map. - -```diff -class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): - @property - def expected_components(self) -> List[ComponentSpec]: - return [ - ComponentSpec("vae", AutoencoderKL), - ComponentSpec("scheduler", EulerDiscreteScheduler), -+ ComponentSpec("mask_processor", VaeImageProcessor, config=FrozenDict({"do_normalize": False, "do_convert_grayscale": True})) - ] -``` - -The `prepare_latents` block requires two new inputs: - -- a change map (`diffdiff_map`) -- a `timestep` for precomputing all the latents and `num_inference_steps` to create the mask for updating the image regions - -```diff - @property - def inputs(self) -> List[Tuple[str, Any]]: - return [ -+ InputParam("diffdiff_map", required=True), - ] - - @property - def intermediate_inputs(self) -> List[InputParam]: - return [ - InputParam("generator"), -- InputParam("latent_timestep", required=True, type_hint=torch.Tensor), -+ InputParam("timesteps", type_hint=torch.Tensor), -+ InputParam("num_inference_steps", type_hint=int), - ] - - @property - def intermediate_outputs(self) -> List[OutputParam]: - return [ -+ OutputParam("original_latents", type_hint=torch.Tensor), -+ OutputParam("diffdiff_masks", type_hint=torch.Tensor), - ] -``` - -The `prepare_latents` block should output the `diffdiff_masks` and `original_latents`. Add the custom logic in the `__call__` method. - -```diff - def __call__(self, components, state: PipelineState): - # ... existing logic ... -+ # Process change map and create masks -+ diffdiff_map = components.mask_processor.preprocess(block_state.diffdiff_map, height=latent_height, width=latent_width) -+ thresholds = torch.arange(block_state.num_inference_steps, dtype=diffdiff_map.dtype) / block_state.num_inference_steps -+ block_state.diffdiff_masks = diffdiff_map > (thresholds + (block_state.denoising_start or 0)) -+ block_state.original_latents = block_state.latents -``` - -### before_denoiser - -The `before_denoiser` sub-block requires three new inputs: - -- a `denoising_start` parameter -- the `diffdiff_masks` and `original_latents` outputs from `prepare_latents` are now inputs to `before_denoiser` - -```diff -class SDXLDiffDiffLoopBeforeDenoiser(PipelineBlock): - @property - def description(self) -> str: - return ( - "Step within the denoising loop for differential diffusion that prepare the latent input for the denoiser" - ) - -+ @property -+ def inputs(self) -> List[Tuple[str, Any]]: -+ return [ -+ InputParam("denoising_start"), -+ ] - - @property - def intermediate_inputs(self) -> List[str]: - return [ - InputParam("latents", required=True, type_hint=torch.Tensor), -+ InputParam("original_latents", type_hint=torch.Tensor), -+ InputParam("diffdiff_masks", type_hint=torch.Tensor), - ] -``` - -Add the custom logic in the `__call__` method. - -```diff - def __call__(self, components, block_state, i, t): -+ # Apply differential diffusion logic -+ if i == 0 and block_state.denoising_start is None: -+ block_state.latents = block_state.original_latents[:1] -+ else: -+ block_state.mask = block_state.diffdiff_masks[i].unsqueeze(0).unsqueeze(1) -+ block_state.latents = block_state.original_latents[i] * block_state.mask + block_state.latents * (1 - block_state.mask) - - # ... rest of existing logic ... -``` - -Run the pipeline again and you should get the image below. - -```py -image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") -mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") - -prompt = "a green pear" -negative_prompt = "blurry" - -image = dd_pipeline( - prompt=prompt, - negative_prompt=negative_prompt, - num_inference_steps=25, - diffdiff_map=mask, - image=image, - output="images" -)[0] - -image.save("diffdiff_out.png") -``` - -
- -
- -## Adding blocks - -You can add blocks to a [`ModularPipeline`] to support additional features without rewriting the pipeline from scratch. - -This section demonstrates how to add an IP-Adapter or ControlNet. - -### IP-Adapter - -Stable Diffusion XL already has a preset IP-Adapter block that you can use and doesn't require any changes to the existing Differential Diffusion pipeline. - -```py -from diffusers.modular_pipelines.stable_diffusion_xl.encoders import StableDiffusionXLAutoIPAdapterStep - -ip_adapter_block = StableDiffusionXLAutoIPAdapterStep() -``` - -Use the [`sub_blocks.insert`] method to insert it into the [`ModularPipeline`]. The example below inserts the `ip_adapter_block` at position `0`. Print the pipeline to see that the `ip_adapter_block` is added and it requires an `ip_adapter_image`. This also added two components to the pipeline, the `image_encoder` and `feature_extractor`. - -```py -dd_blocks.sub_blocks.insert("ip_adapter", ip_adapter_block, 0) -``` - -Call [`~ModularPipeline.init_pipeline`] to initialize a [`ModularPipeline`] and use [`~ModularPipeline.load_default_components`] to load the model components. Load and set the IP-Adapter to run the pipeline. - -```py -dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") -dd_pipeline.load_default_components(torch_dtype=torch.float16) -dd_pipeline.loader.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") -dd_pipeline.loader.set_ip_adapter_scale(0.6) -dd_pipeline = dd_pipeline.to(device) - -ip_adapter_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_orange.jpeg") -image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") -mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") - -prompt = "a green pear" -negative_prompt = "blurry" -generator = torch.Generator(device=device).manual_seed(42) - -image = dd_pipeline( - prompt=prompt, - negative_prompt=negative_prompt, - num_inference_steps=25, - generator=generator, - ip_adapter_image=ip_adapter_image, - diffdiff_map=mask, - image=image, - output="images" -)[0] -``` - -### ControlNet - -Stable Diffusion XL already has a preset ControlNet block that can readily be used. - -```py -from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks import StableDiffusionXLAutoControlNetInputStep - -control_input_block = StableDiffusionXLAutoControlNetInputStep() -``` - -However, it requires modifying the `denoise` block because that's where the ControlNet injects the control information into the UNet. - -Modify the `denoise` block by replacing the `StableDiffusionXLLoopDenoiser` sub-block with the `StableDiffusionXLControlNetLoopDenoiser`. - -```py -class SDXLDiffDiffControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): - block_classes = [SDXLDiffDiffLoopBeforeDenoiser, StableDiffusionXLControlNetLoopDenoiser, StableDiffusionXLDenoiseLoopAfterDenoiser] - block_names = ["before_denoiser", "denoiser", "after_denoiser"] - -controlnet_denoise_block = SDXLDiffDiffControlNetDenoiseStep() -``` - -Insert the `controlnet_input` block and replace the `denoise` block with the new `controlnet_denoise_block`. Initialize a [`ModularPipeline`] and [`~ModularPipeline.load_default_components`] into it. - -```py -dd_blocks.sub_blocks.insert("controlnet_input", control_input_block, 7) -dd_blocks.sub_blocks["denoise"] = controlnet_denoise_block - -dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") -dd_pipeline.load_default_components(torch_dtype=torch.float16) -dd_pipeline = dd_pipeline.to(device) - -control_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_tomato_canny.jpeg") -image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") -mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") - -prompt = "a green pear" -negative_prompt = "blurry" -generator = torch.Generator(device=device).manual_seed(42) - -image = dd_pipeline( - prompt=prompt, - negative_prompt=negative_prompt, - num_inference_steps=25, - generator=generator, - control_image=control_image, - controlnet_conditioning_scale=0.5, - diffdiff_map=mask, - image=image, - output="images" -)[0] -``` - -### AutoPipelineBlocks and SequentialPipelineBlocks - -It is possible to combine the `SDXLDiffDiffControlNetDenoiseStep` and `SDXLDiffDiffDenoiseStep` into a [`AutoPipelineBlocks`]. This is a collection of blocks that automatically determines which block to run based on the input. - -For example, use `block_trigger_inputs` to only run the `SDXLDiffDiffControlNetDenoiseStep` block if a `control_image` input is provided. Otherwise, the `SDXLDiffDiffDenoiseStep` is used. - -```py -class SDXLDiffDiffAutoDenoiseStep(AutoPipelineBlocks): - block_classes = [SDXLDiffDiffControlNetDenoiseStep, SDXLDiffDiffDenoiseStep] - block_names = ["controlnet_denoise", "denoise"] - block_trigger_inputs = ["controlnet_cond", None] -``` - -This gives you the ability to package several workflows into a single [`ModularPipeline`]. Add the `ip_adapter` and `controlnet_input` blocks. - -```py -DIFFDIFF_AUTO_BLOCKS = IMAGE2IMAGE_BLOCKS.copy() -DIFFDIFF_AUTO_BLOCKS["prepare_latents"] = SDXLDiffDiffPrepareLatentsStep -DIFFDIFF_AUTO_BLOCKS["set_timesteps"] = TEXT2IMAGE_BLOCKS["set_timesteps"] -DIFFDIFF_AUTO_BLOCKS["denoise"] = SDXLDiffDiffAutoDenoiseStep -DIFFDIFF_AUTO_BLOCKS.insert("ip_adapter", StableDiffusionXLAutoIPAdapterStep, 0) -DIFFDIFF_AUTO_BLOCKS.insert("controlnet_input",StableDiffusionXLControlNetAutoInput, 7) -``` - -Call [`SequentialPipelineBlocks.from_blocks_dict`] to create a [`SequentialPipelineBlocks`]. This is a sequence of blocks that are chained together and run one after the other if applicable. - -Create a [`ModularPipeline`] and load in the model components to run. - -```py -dd_auto_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_AUTO_BLOCKS) -dd_pipeline = dd_auto_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff") -dd_pipeline.load_default_components(torch_dtype=torch.float16) -``` - -## Share - -Add your [`ModularPipeline`] to the Hub with [`~ModularPipeline.save_pretrained`] and setting the `push_to_hub` argument to `True`. - -```py -dd_pipeline.save_pretrained("YiYiXu/test_modular_doc", push_to_hub=True) -``` - -Other users can load the [`ModularPipeline`] with [`~ModularPipeline.from_pretrained`]. - -It is a good idea to use the [`ComponentManager`] with the pipeline to help you manage the many different components of a pipeline. Once you call [`~ModularPipeline.load_default_components`], the components are registered to the [`ComponentManager`]. - -```py -import torch -from diffusers.utils import load_image -from diffusers.modular_pipelines import ModularPipeline, ComponentsManager - -components = ComponentsManager() - -diffdiff_pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-diffdiff-0704", trust_remote_code=True, components_manager=components, collection="diffdiff") -diffdiff_pipeline.load_default_components(torch_dtype=torch.float16) -``` \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/guiders.md b/docs/source/en/modular_diffusers/guiders.md index 2c7eddf5c7b8..ddf5eb703f1c 100644 --- a/docs/source/en/modular_diffusers/guiders.md +++ b/docs/source/en/modular_diffusers/guiders.md @@ -30,7 +30,7 @@ ComponentSpec(name='guider', type_hint= [!TIP] -> Changing guiders will return some text letting you know you're changing the guider type. +> Changing guiders will return text letting you know you're changing the guider type. > ```bash > ModularPipeline.update_components: adding guider with new type: PerturbedAttentionGuidance, previous type: ClassifierFreeGuidance > ``` @@ -148,10 +148,10 @@ Edit the `modular_model_index.json` file and add a loading specification for the
-Change the [`~ComponentSpec.default_creation_method`] to `"from_pretrained"` and use [`~ModularPipeline.update_components`] to update the guider and component specifications as well as the pipeline config. +Change the [`~ComponentSpec.default_creation_method`] to `from_pretrained` and use [`~ModularPipeline.update_components`] to update the guider and component specifications as well as the pipeline config. > [!TIP] -> Changing the creation method will return some text letting you know you're changing the creation type to `from_pretrained`. +> Changing the creation method will return text letting you know you're changing the creation type to `from_pretrained`. > ```bash > ModularPipeline.update_components: changing the default_creation_method of guider from from_config to from_pretrained. > ``` @@ -165,7 +165,7 @@ pag_guider = guider_spec.load() t2i_pipeline.update_components(guider=pag_guider) ``` -To make it the default guider for a pipeline, call [`~utils.PushToHubMixin.push_to_hub`]. This is an optional step and not necessary if you only want to experiment locally. +To make it the default guider for a pipeline, call [`~utils.PushToHubMixin.push_to_hub`]. This is an optional step and not necessary if you are only experimenting locally. ```py t2i_pipeline.push_to_hub("YiYiXu/modular-doc-guider") diff --git a/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md index 1d819baabf0a..250a7cd23656 100644 --- a/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md @@ -18,7 +18,7 @@ This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBl ## Loop wrapper -[`~modular_pipelines.LoopSequentialPipelineBlocks`], is also known as the *loop wrapper* because defines the loop structure, iteration variables, and configuration. Within the loop wrapper, define the following variables. +[`~modular_pipelines.LoopSequentialPipelineBlocks`], is also known as the *loop wrapper* because it defines the loop structure, iteration variables, and configuration. Within the loop wrapper, you need the following variables. - `loop_inputs` are user provided values and equivalent to [`~modular_pipelines.PipelineBlock.inputs`]. - `loop_intermediate_inputs` are intermediate variables from the [`~modular_pipelines.PipelineState`] and equivalent to [`~modular_pipelines.PipelineBlock.intermediate_inputs`]. @@ -54,9 +54,9 @@ The loop wrapper can pass additional arguments, like current iteration index, to A loop block is a [`~modular_pipelines.PipelineBlock`], but the `__call__` method behaves differently. -- The `__call__` method recieves the iteration variable from the loop wrapper. -- The `__call__` method works directly with the [`~modular_pipelines.BlockState`] instead of the [`~modular_pipelines.PipelineState`]. -- The `__call__` method doesn't require retrieving or updating the [`~modular_pipelines.BlockState`]. +- It recieves the iteration variable from the loop wrapper. +- It works directly with the [`~modular_pipelines.BlockState`] instead of the [`~modular_pipelines.PipelineState`]. +- It doesn't require retrieving or updating the [`~modular_pipelines.BlockState`]. Loop blocks share the same [`~modular_pipelines.BlockState`] to allow values to accumulate and change for each iteration in the loop. diff --git a/docs/source/en/modular_diffusers/modular_diffusers_states.md b/docs/source/en/modular_diffusers/modular_diffusers_states.md index e42bf9497041..093a4b53b7b2 100644 --- a/docs/source/en/modular_diffusers/modular_diffusers_states.md +++ b/docs/source/en/modular_diffusers/modular_diffusers_states.md @@ -10,7 +10,7 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# Block states +# States Blocks rely on the [`~modular_pipelines.PipelineState`] and [`~modular_pipelines.BlockState`] data structures for communicating and sharing data. @@ -23,7 +23,7 @@ This guide explains how states work and how they connect blocks. ## PipelineState -The [`~modular_pipelines.PipelineState`] is a global state container for all pipeline blocks. It maintains the complete runtime state of the pipeline and provides a structured way for blocks to read from and write to shared data. +The [`~modular_pipelines.PipelineState`] is a global state container for all blocks. It maintains the complete runtime state of the pipeline and provides a structured way for blocks to read from and write to shared data. There are two dict's in [`~modular_pipelines.PipelineState`] for structuring data. @@ -46,9 +46,9 @@ PipelineState( ## BlockState -The [`~modular_pipelines.BlockState`] is a local view of the relevant variables, `inputs` and `intermediate_inputs`, that an individual pipeline block needs from [`~modular_pipelines.PipelineState`] for performing it's computations. +The [`~modular_pipelines.BlockState`] is a local view of the relevant variables, `inputs` and `intermediate_inputs`, an individual block needs from [`~modular_pipelines.PipelineState`] for performing it's computations. -You can access these variables directly as attributes like `block_state.image`. +Access these variables directly as attributes like `block_state.image`. ```py BlockState( @@ -74,7 +74,7 @@ def __call__(self, components, state): [`~modular_pipelines.PipelineState`] and [`~modular_pipelines.BlockState`] interaction is defined by a block's `inputs`, `intermediate_inputs`, and `intermediate_outputs`. -- `inputs`, a block can modify an input - like `block_state.image` - but the change is local to the [`~modular_pipelines.BlockState`] and won't affect the original image in [`~modular_pipelines.PipelineState`]. +- `inputs`, a block can modify an input - like `block_state.image` - but the change is local to the [`~modular_pipelines.BlockState`] and won't affect the original input in [`~modular_pipelines.PipelineState`]. - `intermediate_inputs`, is often values created from a previous block. When a block modifies `intermediate_inputs` - like `batch_size` - this change is reflected in both the [`~modular_pipelines.BlockState`] and [`~modular_pipelines.PipelineState`]. Any subsequent blocks are also affected. If a previous block doesn't provide an `intermediate_inputs`, then the pipeline makes it available as a user input. However, the value is still a mutable intermediate state. diff --git a/docs/source/en/modular_diffusers/modular_pipeline.md b/docs/source/en/modular_diffusers/modular_pipeline.md index ee2a659985cc..56225490c0c3 100644 --- a/docs/source/en/modular_diffusers/modular_pipeline.md +++ b/docs/source/en/modular_diffusers/modular_pipeline.md @@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License. # ModularPipeline -[`ModularPipeline`] converts [`~modular_pipelines.PipelineBlock`]'s into an executable pipeline that loads models and performs the computation steps defined in a block. It is the main interface for users to run a pipeline and it is very similar to the [`DiffusionPipeline`] API. +[`ModularPipeline`] converts [`~modular_pipelines.PipelineBlock`]'s into an executable pipeline that loads models and performs the computation steps defined in the block. It is the main interface for running a pipeline and it is very similar to the [`DiffusionPipeline`] API. The main difference is to include an expected `output` argument in the pipeline. @@ -130,7 +130,7 @@ There are two ways to create a [`ModularPipeline`]. Assemble and create a pipeli You should also initialize a [`ComponentsManager`] to handle device placement and memory and component management. > [!TIP] -> Refer to the [ComponentsManager](./components_manager) for more details about how it can help manage components across different workflows. +> Refer to the [ComponentsManager](./components_manager) doc for more details about how it can help manage components across different workflows. @@ -176,7 +176,7 @@ diffdiff_pipeline = ModularPipeline.from_pretrained(modular_repo_id, trust_remot ## Loading components -A [`ModularPipeline`] doesn't automatically instantiated with components. It only loads the configuration and component specifications. You can load all components with [`~ModularPipeline.load_default_components`] or only load specific components with [`~ModularPipeline.load_components`]. +A [`ModularPipeline`] doesn't automatically instantiate with components. It only loads the configuration and component specifications. You can load all components with [`~ModularPipeline.load_default_components`] or only load specific components with [`~ModularPipeline.load_components`]. @@ -208,9 +208,9 @@ Print the pipeline to inspect the loaded pretrained components. t2i_pipeline ``` -This should match the `modular_model_index.json` file from the modular repository a pipeline is initialized from. It shows the loading specifications that match the pipeline's component requirements. If a pipeline doesn't need a component, that component won't be included even if it exists in the modular repository. +This should match the `modular_model_index.json` file from the modular repository a pipeline is initialized from. If a pipeline doesn't need a component, it won't be included even if it exists in the modular repository. -To modify where components are loaded, edit the `modular_model_index.json` file in the repository and change it to your desired loading path. The example below loads the UNet from a different repository. +To modify where components are loaded from, edit the `modular_model_index.json` file in the repository and change it to your desired loading path. The example below loads a UNet from a different repository. ```json # original @@ -259,7 +259,7 @@ t2i_pipeline.pretrained_component_names ['text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'scheduler', 'unet', 'vae'] ``` -Use `config_component_names` to return components that are created with the default config (not loaded from a modular repository). Components from a config aren't included because they already initialized during pipeline creation. This is why they aren't listed in `null_component_names`. +Use `config_component_names` to return components that are created with the default config (not loaded from a modular repository). Components from a config aren't included because they are already initialized during pipeline creation. This is why they aren't listed in `null_component_names`. ```py t2i_pipeline.config_component_names @@ -273,9 +273,11 @@ Components may be updated depending on whether it is a *pretrained component* or > [!WARNING] > A component may change from pretrained to config when updating a component. The component type is initially defined in a block's `expected_components` field. -A pretrained component requires updating with [`ComponentSpec`] whereas a config component can be updated by eihter passing the object directly or with [`ComponentSpec`]. The [`ComponentSpec`] shows `default_creation_method="from_pretrained"` for a pretrained component. +A pretrained component is updated with [`ComponentSpec`] whereas a config component is updated by eihter passing the object directly or with [`ComponentSpec`]. -To update a pretrained component, create a [`ComponentSpec`] with the name of the component and where to load it from. Use the [`~ComponentSpec.load`] method to load the component. The [`ComponentSpec`] shows `default_creation_method="from_config` for a config component. +The [`ComponentSpec`] shows `default_creation_method="from_pretrained"` for a pretrained component shows `default_creation_method="from_config` for a config component. + +To update a pretrained component, create a [`ComponentSpec`] with the name of the component and where to load it from. Use the [`~ComponentSpec.load`] method to load the component. ```py from diffusers import ComponentSpec, UNet2DConditionModel @@ -294,7 +296,7 @@ When a component is updated, the loading specifications are also updated in the ### Component extraction and modification -When you use [`ComponentSpec.load`], the new component maintains its loading specifications. This makes it possible to extract the specification and recreate the component. +When you use [`~ComponentSpec.load`], the new component maintains its loading specifications. This makes it possible to extract the specification and recreate the component. ```py spec = ComponentSpec.from_component("unet", unet2) diff --git a/docs/source/en/modular_diffusers/overview.md b/docs/source/en/modular_diffusers/overview.md index 085318d550f3..d3b9a7f315aa 100644 --- a/docs/source/en/modular_diffusers/overview.md +++ b/docs/source/en/modular_diffusers/overview.md @@ -13,28 +13,29 @@ specific language governing permissions and limitations under the License. # Overview > [!WARNING] -> ⚠︎ Modular Diffusers is still under active development and it's API may change. +> Modular Diffusers is under active development and it's API may change. Modular Diffusers is a unified pipeline system that simplifies your workflow with *pipeline blocks*. -- Blocks are reusable and you only create new blocks that are unique to your pipeline. +- Blocks are reusable and you only need to create new blocks that are unique to your pipeline. - Blocks can be mixed and matched to adapt to or create a pipeline for a specific workflow or multiple workflows. The Modular Diffusers docs are organized as shown below. ## Quickstart -- A [quickstart](./quickstart) start for implementing an example workflow with Modular Diffusers. - -## ModularPipeline - -- [ModularPipeline](./modular_pipeline) shows you how to build with pipeline blocks and convert them into an executable [`ModularPipeline`]. -- [ComponentsManager](./components_manager) shows you how to manage and reuse components across multiple pipelines. +- A [quickstart](./quickstart) demonstrating how to implement an example workflow with Modular Diffusers. ## ModularPipelineBlocks -- [States](./modular_diffusers_states) explains how data is shared and communicated between pipeline blocks and [`ModularPipeline`]. +- [States](./modular_diffusers_states) explains how data is shared and communicated between blocks and [`ModularPipeline`]. - [PipelineBlock](./pipeline_block) is the most basic unit of a [`ModularPipeline`] and this guide shows you how to create one. - [SequentialPipelineBlocks](./sequential_pipeline_blocks) is a type of block that chains multiple blocks so they run one after another, passing data along the chain. This guide shows you how to create [`~modular_pipelines.SequentialPipelineBlocks`] and how they connect and work together. - [LoopSequentialPipelineBlocks](./loop_sequential_pipeline_blocks) is a type of block that runs a series of blocks in a loop. This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. -- [AutoPipelineBlocks](./auto_pipeline_blocks) is a type of block that automatically chooses which blocks to run based on the input. This guide shows you how to create [`~modular_pipelines.AutoPipelineBlocks`]. \ No newline at end of file +- [AutoPipelineBlocks](./auto_pipeline_blocks) is a type of block that automatically chooses which blocks to run based on the input. This guide shows you how to create [`~modular_pipelines.AutoPipelineBlocks`]. + +## ModularPipeline + +- [ModularPipeline](./modular_pipeline) shows you how to create and convert pipeline blocks into an executable [`ModularPipeline`]. +- [ComponentsManager](./components_manager) shows you how to manage and reuse components across multiple pipelines. +- [Guiders](./guiders) shows you how to use different guidance methods in the pipeline. \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/pipeline_block.md b/docs/source/en/modular_diffusers/pipeline_block.md index fd553cee9454..a96613bf20e1 100644 --- a/docs/source/en/modular_diffusers/pipeline_block.md +++ b/docs/source/en/modular_diffusers/pipeline_block.md @@ -14,18 +14,18 @@ specific language governing permissions and limitations under the License. [`~modular_pipelines.PipelineBlock`] is the basic block for building a [`ModularPipeline`]. It defines what components, inputs/outputs, and computation a block should perform for a specific step in a pipeline. A [`~modular_pipelines.PipelineBlock`] connects with other blocks, using [state](./modular_diffusers_states), to enable the modular construction of workflows. -A [`~modular_pipelines.PipelineBlock`] on it's own can't be run to generate anything. It is a blueprint for what a step should do in a pipeline. To actually run and execute a pipeline, the [`~modular_pipelines.PipelineBlock`] needs to be converted into a [`ModularPipeline`]. +A [`~modular_pipelines.PipelineBlock`] on it's own can't be executed. It is a blueprint for what a step should do in a pipeline. To actually run and execute a pipeline, the [`~modular_pipelines.PipelineBlock`] needs to be converted into a [`ModularPipeline`]. This guide will show you how to create a [`~modular_pipelines.PipelineBlock`]. ## Inputs and outputs > [!TIP] -> Refer to the [Block states](./modular_diffusers_states) guide if you aren't familiar with how state works in Modular Diffusers. +> Refer to the [States](./modular_diffusers_states) guide if you aren't familiar with how state works in Modular Diffusers. A [`~modular_pipelines.PipelineBlock`] requires `inputs`, `intermediate_inputs`, and `intermediate_outputs`. -- `inputs` are values provided by a user and they are retrieved from the [`~modular_pipelines.PipelineState`], which means `inputs` can't be modified. This is useful because some workflows resize an image, but the original image is still required. The [`~modular_pipelines.PipelineState`] maintains the original image. +- `inputs` are values provided by a user and retrieved from the [`~modular_pipelines.PipelineState`], which means `inputs` can't be modified. This is useful because some workflows resize an image, but the original image is still required. The [`~modular_pipelines.PipelineState`] maintains the original image. Use `InputParam` to define `inputs`. @@ -37,7 +37,7 @@ A [`~modular_pipelines.PipelineBlock`] requires `inputs`, `intermediate_inputs`, ] ``` -- `intermediate_inputs` are values are typically created from a previous block but it can also be directly provided if no preceding block generates them. Unlike `inputs`, `intermediate_inputs` can be modified. +- `intermediate_inputs` are values typically created from a previous block but it can also be directly provided if no preceding block generates them. Unlike `inputs`, `intermediate_inputs` can be modified. Use `InputParam` to define `intermediate_inputs`. @@ -59,13 +59,13 @@ A [`~modular_pipelines.PipelineBlock`] requires `inputs`, `intermediate_inputs`, ] ``` -The intermediate inputs and outputs work together to connect blocks by sharing data between them. They are accessible at any point, allowing you to track the workflow's progress. +The intermediate inputs and outputs share data to connect blocks. They are accessible at any point, allowing you to track the workflow's progress. ## Computation logic -The computation a block performs is defined in the `__call__` method which follows a specific structure. +The computation a block performs is defined in the `__call__` method and it follows a specific structure. -1. Retrieve the [`~modular_pipelines.BlockState`] to get a local view of the `inputs` and `intermediate_inputs` it needs from [`~modular_pipelines.PipelineState`]. +1. Retrieve the [`~modular_pipelines.BlockState`] to get a local view of the `inputs` and `intermediate_inputs`. 2. Implement the computation logic on the `inputs` and `intermediate_inputs`. 3. Update [`~modular_pipelines.PipelineState`] to push changes from the local [`~modular_pipelines.BlockState`] back to the global [`~modular_pipelines.PipelineState`]. 4. Return the components and state which becomes available to the next block. @@ -84,7 +84,7 @@ def __call__(self, components, state): return components, state ``` -### Components and Configs +### Components and configs The components and pipeline-level configs a block needs are specified in [`ComponentSpec`] and [`~modular_pipelines.ConfigSpec`]. From a65345010ebd215492e09c1ed2bd3eac15dc9bfa Mon Sep 17 00:00:00 2001 From: DN6 Date: Tue, 12 Aug 2025 12:20:11 +0530 Subject: [PATCH 13/15] update --- docs/source/en/_toctree.yml | 2 +- .../api/modular_diffusers/pipeline_blocks.md | 6 +-- .../modular_diffusers/auto_pipeline_blocks.md | 50 +++++++------------ .../loop_sequential_pipeline_blocks.md | 14 +++--- .../modular_diffusers_states.md | 30 +++++------ .../en/modular_diffusers/modular_pipeline.md | 18 +++---- docs/source/en/modular_diffusers/overview.md | 2 +- .../en/modular_diffusers/pipeline_block.md | 16 +++--- .../source/en/modular_diffusers/quickstart.md | 40 ++++++--------- .../sequential_pipeline_blocks.md | 12 ++--- src/diffusers/__init__.py | 8 +-- 11 files changed, 82 insertions(+), 116 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 68975d8029a8..b06d819dd3de 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -117,7 +117,7 @@ - local: modular_diffusers/modular_diffusers_states title: States - local: modular_diffusers/pipeline_block - title: PipelineBlock + title: ModularPipelineBlocks - local: modular_diffusers/sequential_pipeline_blocks title: SequentialPipelineBlocks - local: modular_diffusers/loop_sequential_pipeline_blocks diff --git a/docs/source/en/api/modular_diffusers/pipeline_blocks.md b/docs/source/en/api/modular_diffusers/pipeline_blocks.md index fd454fb68c45..8ad581e679ac 100644 --- a/docs/source/en/api/modular_diffusers/pipeline_blocks.md +++ b/docs/source/en/api/modular_diffusers/pipeline_blocks.md @@ -4,10 +4,6 @@ [[autodoc]] diffusers.modular_pipelines.modular_pipeline.ModularPipelineBlocks -## PipelineBlock - -[[autodoc]] diffusers.modular_pipelines.modular_pipeline.PipelineBlock - ## SequentialPipelineBlocks [[autodoc]] diffusers.modular_pipelines.modular_pipeline.SequentialPipelineBlocks @@ -18,4 +14,4 @@ ## AutoPipelineBlocks -[[autodoc]] diffusers.modular_pipelines.modular_pipeline.AutoPipelineBlocks \ No newline at end of file +[[autodoc]] diffusers.modular_pipelines.modular_pipeline.AutoPipelineBlocks \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/auto_pipeline_blocks.md b/docs/source/en/modular_diffusers/auto_pipeline_blocks.md index 970e389dfac7..2d4d82c735bd 100644 --- a/docs/source/en/modular_diffusers/auto_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/auto_pipeline_blocks.md @@ -16,34 +16,30 @@ specific language governing permissions and limitations under the License. This guide shows how to create [`~modular_pipelines.AutoPipelineBlocks`]. -Create three [`~modular_pipelines.PipelineBlock`] for text-to-image, image-to-image, and inpainting. These represent the different workflows available in the pipeline. +Create three [`~modular_pipelines.ModularPipelineBlocks`] for text-to-image, image-to-image, and inpainting. These represent the different workflows available in the pipeline. ```py import torch -from diffusers.modular_pipelines import PipelineBlock, InputParam, OutputParam +from diffusers.modular_pipelines import ModularPipelineBlocks, InputParam, OutputParam -class TextToImageBlock(PipelineBlock): +class TextToImageBlock(ModularPipelineBlocks): model_name = "text2img" - + @property def inputs(self): return [InputParam(name="prompt")] - - @property - def intermediate_inputs(self): - return [] - + @property def intermediate_outputs(self): return [] - + @property def description(self): return "I'm a text-to-image workflow!" - + def __call__(self, components, state): block_state = self.get_block_state(state) print("running the text-to-image workflow") @@ -58,25 +54,21 @@ class TextToImageBlock(PipelineBlock): ```py -class ImageToImageBlock(PipelineBlock): +class ImageToImageBlock(ModularPipelineBlocks): model_name = "img2img" - + @property def inputs(self): return [InputParam(name="prompt"), InputParam(name="image")] - - @property - def intermediate_inputs(self): - return [] - + @property def intermediate_outputs(self): return [] - + @property def description(self): return "I'm an image-to-image workflow!" - + def __call__(self, components, state): block_state = self.get_block_state(state) print("running the image-to-image workflow") @@ -91,25 +83,21 @@ class ImageToImageBlock(PipelineBlock): ```py -class InpaintBlock(PipelineBlock): +class InpaintBlock(ModularPipelineBlocks): model_name = "inpaint" - + @property def inputs(self): return [InputParam(name="prompt"), InputParam(name="image"), InputParam(name="mask")] - - @property - def intermediate_inputs(self): - return [] - + @property def intermediate_outputs(self): return [] - + @property def description(self): return "I'm an inpaint workflow!" - + def __call__(self, components, state): block_state = self.get_block_state(state) print("running the inpaint workflow") @@ -129,7 +117,7 @@ You also need to include `block_trigger_inputs`, a list of input names that trig Lastly, it is important to include a `description` that clearly explains which inputs trigger which workflow. This helps users understand how to run specific workflows. ```py -from diffusers.modular_pipelines import AutoPipelineBlocks +from diffusers.modular_pipelines import AutoPipelineBlocks class AutoImageBlocks(AutoPipelineBlocks): # List of sub-block classes to choose from @@ -138,7 +126,7 @@ class AutoImageBlocks(AutoPipelineBlocks): block_names = ["inpaint", "img2img", "text2img"] # Trigger inputs that determine which block to run # - "mask" triggers inpaint workflow - # - "image" triggers img2img workflow (but only if mask is not provided) + # - "image" triggers img2img workflow (but only if mask is not provided) # - if none of above, runs the text2img workflow (default) block_trigger_inputs = ["mask", "image", None] # Description is extremely important for AutoPipelineBlocks diff --git a/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md index 250a7cd23656..86c82b5145d3 100644 --- a/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/loop_sequential_pipeline_blocks.md @@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License. # LoopSequentialPipelineBlocks -[`~modular_pipelines.LoopSequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.PipelineBlock`] together in a loop. Data flows circularly, using `intermediate_inputs` and `intermediate_outputs`, and each block is run iteratively. This is typically used to create a denoising loop which is iterative by default. +[`~modular_pipelines.LoopSequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.ModularPipelineBlocks`] together in a loop. Data flows circularly, using `intermediate_inputs` and `intermediate_outputs`, and each block is run iteratively. This is typically used to create a denoising loop which is iterative by default. This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. @@ -20,14 +20,14 @@ This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBl [`~modular_pipelines.LoopSequentialPipelineBlocks`], is also known as the *loop wrapper* because it defines the loop structure, iteration variables, and configuration. Within the loop wrapper, you need the following variables. -- `loop_inputs` are user provided values and equivalent to [`~modular_pipelines.PipelineBlock.inputs`]. -- `loop_intermediate_inputs` are intermediate variables from the [`~modular_pipelines.PipelineState`] and equivalent to [`~modular_pipelines.PipelineBlock.intermediate_inputs`]. -- `loop_intermediate_outputs` are new intermediate variables created by the block and added to the [`~modular_pipelines.PipelineState`]. It is equivalent to [`~modular_pipelines.PipelineBlock.intermediate_outputs`]. +- `loop_inputs` are user provided values and equivalent to [`~modular_pipelines.ModularPipelineBlocks.inputs`]. +- `loop_intermediate_inputs` are intermediate variables from the [`~modular_pipelines.PipelineState`] and equivalent to [`~modular_pipelines.ModularPipelineBlocks.intermediate_inputs`]. +- `loop_intermediate_outputs` are new intermediate variables created by the block and added to the [`~modular_pipelines.PipelineState`]. It is equivalent to [`~modular_pipelines.ModularPipelineBlocks.intermediate_outputs`]. - `__call__` method defines the loop structure and iteration logic. ```py import torch -from diffusers.modular_pipelines import LoopSequentialPipelineBlocks, PipelineBlock, InputParam, OutputParam +from diffusers.modular_pipelines import LoopSequentialPipelineBlocks, ModularPipelineBlocks, InputParam, OutputParam class LoopWrapper(LoopSequentialPipelineBlocks): model_name = "test" @@ -52,7 +52,7 @@ The loop wrapper can pass additional arguments, like current iteration index, to ## Loop blocks -A loop block is a [`~modular_pipelines.PipelineBlock`], but the `__call__` method behaves differently. +A loop block is a [`~modular_pipelines.ModularPipelineBlocks`], but the `__call__` method behaves differently. - It recieves the iteration variable from the loop wrapper. - It works directly with the [`~modular_pipelines.BlockState`] instead of the [`~modular_pipelines.PipelineState`]. @@ -61,7 +61,7 @@ A loop block is a [`~modular_pipelines.PipelineBlock`], but the `__call__` metho Loop blocks share the same [`~modular_pipelines.BlockState`] to allow values to accumulate and change for each iteration in the loop. ```py -class LoopBlock(PipelineBlock): +class LoopBlock(ModularPipelineBlocks): model_name = "test" @property def inputs(self): diff --git a/docs/source/en/modular_diffusers/modular_diffusers_states.md b/docs/source/en/modular_diffusers/modular_diffusers_states.md index 093a4b53b7b2..0a64fc9088fd 100644 --- a/docs/source/en/modular_diffusers/modular_diffusers_states.md +++ b/docs/source/en/modular_diffusers/modular_diffusers_states.md @@ -17,7 +17,7 @@ Blocks rely on the [`~modular_pipelines.PipelineState`] and [`~modular_pipelines | State | Description | |-------|-------------| | [`~modular_pipelines.PipelineState`] | Maintains the overall data required for a pipeline's execution and allows blocks to read and update its data. | -| [`~modular_pipelines.BlockState`] | Allows each block to perform its computation with the necessary data from `inputs` and `intermediate_inputs` | +| [`~modular_pipelines.BlockState`] | Allows each block to perform its computation with the necessary data from `inputs`| This guide explains how states work and how they connect blocks. @@ -27,17 +27,15 @@ The [`~modular_pipelines.PipelineState`] is a global state container for all blo There are two dict's in [`~modular_pipelines.PipelineState`] for structuring data. -- The `inputs` dict is an **immutable** state containing a copy of user provided values. A value added to `inputs` cannot be changed. Blocks can read from `inputs` but cannot write to it. -- The `intermediates` dict is a **mutable** state containing variables that are passed between blocks and can be modified by them. +- The `values` dict is an **mutable** state containing a copy of user provided input values and intermediate output values generated by blocks. Blocks can read and write to their `inputs`. If a block modifies an `input`, it will be reflected in the `values` dict after calling `set_block_state`. +- The `kwargs_type` ```py PipelineState( - inputs={ + values={ 'prompt': 'a cat' 'guidance_scale': 7.0 'num_inference_steps': 25 - }, - intermediates={ 'prompt_embeds': Tensor(dtype=torch.float32, shape=torch.Size([1, 1, 1, 1])) 'negative_prompt_embeds': None }, @@ -46,7 +44,7 @@ PipelineState( ## BlockState -The [`~modular_pipelines.BlockState`] is a local view of the relevant variables, `inputs` and `intermediate_inputs`, an individual block needs from [`~modular_pipelines.PipelineState`] for performing it's computations. +The [`~modular_pipelines.BlockState`] is a local view of the relevant variables an individual block needs from [`~modular_pipelines.PipelineState`] for performing it's computations. Access these variables directly as attributes like `block_state.image`. @@ -62,9 +60,9 @@ When a block's `__call__` method is executed, it retrieves the [`BlockState`] wi def __call__(self, components, state): # retrieve BlockState block_state = self.get_block_state(state) - - # computation logic on inputs and intermediate_inputs - + + # computation logic on inputs + # update PipelineState self.set_block_state(state, block_state) return components, state @@ -72,13 +70,9 @@ def __call__(self, components, state): ## State interaction -[`~modular_pipelines.PipelineState`] and [`~modular_pipelines.BlockState`] interaction is defined by a block's `inputs`, `intermediate_inputs`, and `intermediate_outputs`. - -- `inputs`, a block can modify an input - like `block_state.image` - but the change is local to the [`~modular_pipelines.BlockState`] and won't affect the original input in [`~modular_pipelines.PipelineState`]. -- `intermediate_inputs`, is often values created from a previous block. When a block modifies `intermediate_inputs` - like `batch_size` - this change is reflected in both the [`~modular_pipelines.BlockState`] and [`~modular_pipelines.PipelineState`]. Any subsequent blocks are also affected. - - If a previous block doesn't provide an `intermediate_inputs`, then the pipeline makes it available as a user input. However, the value is still a mutable intermediate state. +[`~modular_pipelines.PipelineState`] and [`~modular_pipelines.BlockState`] interaction is defined by a block's `inputs`, and `intermediate_outputs`. -- `intermediate_outputs`, is a new variable that a block creates from `intermediate_inputs`. It is added to the [`~modular_pipelines.PipelineState`]'s `intermediates` dict and available as an `intermediate_inputs` for subsequent blocks or accessed by users as a final output from the pipeline. +- `inputs`, a block can modify an input - like `block_state.image` - and this change can be propagated globally to [`~modular_pipelines.PipelineState`] by calling `set_block_state`. +- `intermediate_outputs`, is a new variable that a block creates. It is added to the [`~modular_pipelines.PipelineState`]'s `values` dict and is available as for subsequent blocks or accessed by users as a final output from the pipeline. - If a variable is modified in `block_state` but not declared as an `intermediate_outputs`, it won't be added to [`~modular_pipelines.PipelineState`]. \ No newline at end of file +If a variable is modified in `block_state` but not declared as an `intermediate_outputs`, it won't be added to [`~modular_pipelines.PipelineState`]. \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/modular_pipeline.md b/docs/source/en/modular_diffusers/modular_pipeline.md index 56225490c0c3..5bdef66a70de 100644 --- a/docs/source/en/modular_diffusers/modular_pipeline.md +++ b/docs/source/en/modular_diffusers/modular_pipeline.md @@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License. # ModularPipeline -[`ModularPipeline`] converts [`~modular_pipelines.PipelineBlock`]'s into an executable pipeline that loads models and performs the computation steps defined in the block. It is the main interface for running a pipeline and it is very similar to the [`DiffusionPipeline`] API. +[`ModularPipeline`] converts [`~modular_pipelines.ModularPipelineBlocks`]'s into an executable pipeline that loads models and performs the computation steps defined in the block. It is the main interface for running a pipeline and it is very similar to the [`DiffusionPipeline`] API. The main difference is to include an expected `output` argument in the pipeline. @@ -125,7 +125,7 @@ t2i_blocks.sub_blocks["prepare_latents"] = CustomPrepareLatents() ## Creating a pipeline -There are two ways to create a [`ModularPipeline`]. Assemble and create a pipeline from [`PipelineBlocks`] or load an existing pipeline with [`~ModularPipeline.from_pretrained`]. +There are two ways to create a [`ModularPipeline`]. Assemble and create a pipeline from [`ModularPipelineBlocks`] or load an existing pipeline with [`~ModularPipeline.from_pretrained`]. You should also initialize a [`ComponentsManager`] to handle device placement and memory and component management. @@ -133,7 +133,7 @@ You should also initialize a [`ComponentsManager`] to handle device placement an > Refer to the [ComponentsManager](./components_manager) doc for more details about how it can help manage components across different workflows. - + Use the [`~ModularPipelineBlocks.init_pipeline`] method to create a [`ModularPipeline`] from the component and configuration specifications. This method loads the *specifications* from a `modular_model_index.json` file, but it doesn't load the *models* yet. @@ -228,7 +228,7 @@ To modify where components are loaded from, edit the `modular_model_index.json` null, null, { "repo": "RunDiffusion/Juggernaut-XL-v9", - "subfolder": "unet", + "subfolder": "unet", "variant": "fp16" } ] @@ -311,11 +311,11 @@ The [`~ModularPipeline.get_component_spec`] method gets a copy of the current co unet_spec = t2i_pipeline.get_component_spec("unet") unet_spec ComponentSpec( - name='unet', - type_hint=, - repo='RunDiffusion/Juggernaut-XL-v9', - subfolder='unet', - variant='fp16', + name='unet', + type_hint=, + repo='RunDiffusion/Juggernaut-XL-v9', + subfolder='unet', + variant='fp16', default_creation_method='from_pretrained' ) diff --git a/docs/source/en/modular_diffusers/overview.md b/docs/source/en/modular_diffusers/overview.md index d3b9a7f315aa..7d07c4b73434 100644 --- a/docs/source/en/modular_diffusers/overview.md +++ b/docs/source/en/modular_diffusers/overview.md @@ -29,7 +29,7 @@ The Modular Diffusers docs are organized as shown below. ## ModularPipelineBlocks - [States](./modular_diffusers_states) explains how data is shared and communicated between blocks and [`ModularPipeline`]. -- [PipelineBlock](./pipeline_block) is the most basic unit of a [`ModularPipeline`] and this guide shows you how to create one. +- [ModularPipelineBlocks](./pipeline_block) is the most basic unit of a [`ModularPipeline`] and this guide shows you how to create one. - [SequentialPipelineBlocks](./sequential_pipeline_blocks) is a type of block that chains multiple blocks so they run one after another, passing data along the chain. This guide shows you how to create [`~modular_pipelines.SequentialPipelineBlocks`] and how they connect and work together. - [LoopSequentialPipelineBlocks](./loop_sequential_pipeline_blocks) is a type of block that runs a series of blocks in a loop. This guide shows you how to create [`~modular_pipelines.LoopSequentialPipelineBlocks`]. - [AutoPipelineBlocks](./auto_pipeline_blocks) is a type of block that automatically chooses which blocks to run based on the input. This guide shows you how to create [`~modular_pipelines.AutoPipelineBlocks`]. diff --git a/docs/source/en/modular_diffusers/pipeline_block.md b/docs/source/en/modular_diffusers/pipeline_block.md index a96613bf20e1..66d26b021456 100644 --- a/docs/source/en/modular_diffusers/pipeline_block.md +++ b/docs/source/en/modular_diffusers/pipeline_block.md @@ -10,22 +10,22 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# PipelineBlock +# ModularPipelineBlocks -[`~modular_pipelines.PipelineBlock`] is the basic block for building a [`ModularPipeline`]. It defines what components, inputs/outputs, and computation a block should perform for a specific step in a pipeline. A [`~modular_pipelines.PipelineBlock`] connects with other blocks, using [state](./modular_diffusers_states), to enable the modular construction of workflows. +[`~modular_pipelines.ModularPipelineBlocks`] is the basic block for building a [`ModularPipeline`]. It defines what components, inputs/outputs, and computation a block should perform for a specific step in a pipeline. A [`~modular_pipelines.ModularPipelineBlocks`] connects with other blocks, using [state](./modular_diffusers_states), to enable the modular construction of workflows. -A [`~modular_pipelines.PipelineBlock`] on it's own can't be executed. It is a blueprint for what a step should do in a pipeline. To actually run and execute a pipeline, the [`~modular_pipelines.PipelineBlock`] needs to be converted into a [`ModularPipeline`]. +A [`~modular_pipelines.ModularPipelineBlocks`] on it's own can't be executed. It is a blueprint for what a step should do in a pipeline. To actually run and execute a pipeline, the [`~modular_pipelines.ModularPipelineBlocks`] needs to be converted into a [`ModularPipeline`]. -This guide will show you how to create a [`~modular_pipelines.PipelineBlock`]. +This guide will show you how to create a [`~modular_pipelines.ModularPipelineBlocks`]. ## Inputs and outputs > [!TIP] > Refer to the [States](./modular_diffusers_states) guide if you aren't familiar with how state works in Modular Diffusers. -A [`~modular_pipelines.PipelineBlock`] requires `inputs`, `intermediate_inputs`, and `intermediate_outputs`. +A [`~modular_pipelines.ModularPipelineBlocks`] requires `inputs`, and `intermediate_outputs`. -- `inputs` are values provided by a user and retrieved from the [`~modular_pipelines.PipelineState`], which means `inputs` can't be modified. This is useful because some workflows resize an image, but the original image is still required. The [`~modular_pipelines.PipelineState`] maintains the original image. +- `inputs` are values provided by a user and retrieved from the [`~modular_pipelines.PipelineState`]. This is useful because some workflows resize an image, but the original image is still required. The [`~modular_pipelines.PipelineState`] maintains the original image. Use `InputParam` to define `inputs`. @@ -74,11 +74,11 @@ The computation a block performs is defined in the `__call__` method and it foll def __call__(self, components, state): # Get a local view of the state variables this block needs block_state = self.get_block_state(state) - + # Your computation logic here # block_state contains all your inputs and intermediate_inputs # Access them like: block_state.image, block_state.processed_image - + # Update the pipeline state with your updated block_states self.set_block_state(state, block_state) return components, state diff --git a/docs/source/en/modular_diffusers/quickstart.md b/docs/source/en/modular_diffusers/quickstart.md index 664699680859..9898c103f7cd 100644 --- a/docs/source/en/modular_diffusers/quickstart.md +++ b/docs/source/en/modular_diffusers/quickstart.md @@ -20,7 +20,7 @@ This doc will show you how to implement a [Differential Diffusion](https://diffe [`ModularPipelineBlocks`] are *definitions* that specify the components, inputs, outputs, and computation logic for a single step in a pipeline. There are four types of blocks. -- [`PipelineBlock`] is the most basic block for a single step. +- [`ModularPipelineBlocks`] is the most basic block for a single step. - [`SequentialPipelineBlocks`] is a multi-block that composes other blocks linearly. The outputs of one block are the inputs to the next block. - [`LoopSequentialPipelineBlocks`] is a multi-block that runs iteratively and is designed for iterative workflows. - [`AutoPipelineBlocks`] is a collection of blocks for different workflows and it selects which block to run based on the input. It is designed to conveniently package multiple workflows into a single pipeline. @@ -45,14 +45,14 @@ IMAGE2IMAGE_BLOCKS = InsertableDict([ Modular Diffusers uses *state* to communicate data between blocks. There are two types of states. -- [`PipelineState`] is a global state with `inputs` provided by the user and `intermediates` (inputs and outputs). The `intermediates` are passed between blocks and can change whereas the `inputs` can't. +- [`PipelineState`] is a global state that can be used to track all inputs and outputs across all blocks. - [`BlockState`] is a local view of relevant variables from [`PipelineState`] for an individual block. ## Customizing blocks [Differential Diffusion](https://differential-diffusion.github.io/) differs from standard image-to-image in its `prepare_latents` and `denoise` blocks. All the other blocks can be reused, but you'll need to modify these two. -Create placeholder `PipelineBlocks` for `prepare_latents` and `denoise` by copying and modifying the existing ones. +Create placeholder `ModularPipelineBlocks` for `prepare_latents` and `denoise` by copying and modifying the existing ones. Print the `denoise` block to see that it is composed of [`LoopSequentialPipelineBlocks`] with three sub-blocks, `before_denoiser`, `denoiser`, and `after_denoiser`. Only the `before_denoiser` sub-block needs to be modified to prepare the latent input for the denoiser based on the change map. @@ -65,7 +65,7 @@ Replace the `StableDiffusionXLLoopBeforeDenoiser` sub-block with the new `SDXLDi ```py # Copy existing blocks as placeholders -class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): +class SDXLDiffDiffPrepareLatentsStep(ModularPipelineBlocks): """Copied from StableDiffusionXLImg2ImgPrepareLatentsStep - will modify later""" # ... same implementation as StableDiffusionXLImg2ImgPrepareLatentsStep @@ -79,12 +79,11 @@ class SDXLDiffDiffDenoiseStep(StableDiffusionXLDenoiseLoopWrapper): The `prepare_latents` block requires the following changes. - a processor to process the change map -- a new `inputs` to accept the user-provided change map -- two new `intermediate_inputs`, `timestep` for precomputing all the latents and `num_inference_steps` to create the mask for updating the image regions +- a new `inputs` to accept the user-provided change map, `timestep` for precomputing all the latents and `num_inference_steps` to create the mask for updating the image regions - update the computation in the `__call__` method for processing the change map and creating the masks, and storing it in the [`BlockState`] ```diff -class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): +class SDXLDiffDiffPrepareLatentsStep(ModularPipelineBlocks): @property def expected_components(self) -> List[ComponentSpec]: return [ @@ -95,13 +94,8 @@ class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): @property def inputs(self) -> List[Tuple[str, Any]]: return [ -+ InputParam("diffdiff_map", required=True), - ] - - @property - def intermediate_inputs(self) -> List[InputParam]: - return [ InputParam("generator"), ++ InputParam("diffdiff_map", required=True), - InputParam("latent_timestep", required=True, type_hint=torch.Tensor), + InputParam("timesteps", type_hint=torch.Tensor), + InputParam("num_inference_steps", type_hint=int), @@ -126,28 +120,22 @@ class SDXLDiffDiffPrepareLatentsStep(PipelineBlock): The `before_denoiser` sub-block requires the following changes. -- a new `inputs` to accept a `denoising_start` parameter -- two new `intermediate_inputs` to accept the `original_latents` and `diffdiff_masks` from the `prepare_latents` block +- a new `inputs` to accept a `denoising_start` parameter, `original_latents` and `diffdiff_masks` from the `prepare_latents` block - update the computation in the `__call__` method for applying Differential Diffusion ```diff -class SDXLDiffDiffLoopBeforeDenoiser(PipelineBlock): +class SDXLDiffDiffLoopBeforeDenoiser(ModularPipelineBlocks): @property def description(self) -> str: return ( "Step within the denoising loop for differential diffusion that prepare the latent input for the denoiser" ) -+ @property -+ def inputs(self) -> List[Tuple[str, Any]]: -+ return [ -+ InputParam("denoising_start"), -+ ] - @property - def intermediate_inputs(self) -> List[str]: + def inputs(self) -> List[str]: return [ InputParam("latents", required=True, type_hint=torch.Tensor), ++ InputParam("denoising_start"), + InputParam("original_latents", type_hint=torch.Tensor), + InputParam("diffdiff_masks", type_hint=torch.Tensor), ] @@ -159,7 +147,7 @@ class SDXLDiffDiffLoopBeforeDenoiser(PipelineBlock): + else: + block_state.mask = block_state.diffdiff_masks[i].unsqueeze(0).unsqueeze(1) + block_state.latents = block_state.original_latents[i] * block_state.mask + block_state.latents * (1 - block_state.mask) - + # ... rest of existing logic ... ``` @@ -232,7 +220,7 @@ dd_pipeline = dd_pipeline.to(device) ip_adapter_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_orange.jpeg") image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") -mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") +mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") prompt = "a green pear" negative_prompt = "blurry" @@ -284,7 +272,7 @@ dd_pipeline = dd_pipeline.to(device) control_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_tomato_canny.jpeg") image = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/20240329211129_4024911930.png?download=true") -mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") +mask = load_image("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/differential/gradient_mask.png?download=true") prompt = "a green pear" negative_prompt = "blurry" diff --git a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md index 1381377e21ea..6a18eaf20e89 100644 --- a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md @@ -12,19 +12,19 @@ specific language governing permissions and limitations under the License. # SequentialPipelineBlocks -[`~modular_pipelines.SequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.PipelineBlock`] together in a sequence. Data flows linearly from one block to the next using `intermediate_inputs` and `intermediate_outputs`. Each block in [`~modular_pipelines.SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline. +[`~modular_pipelines.SequentialPipelineBlocks`] are a multi-block type that composes other [`~modular_pipelines.ModularPipelineBlocks`] together in a sequence. Data flows linearly from one block to the next using `intermediate_inputs` and `intermediate_outputs`. Each block in [`~modular_pipelines.SequentialPipelineBlocks`] usually represents a step in the pipeline, and by combining them, you gradually build a pipeline. This guide shows you how to connect two blocks into a [`~modular_pipelines.SequentialPipelineBlocks`]. -Create two [`~modular_pipelines.PipelineBlock`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `intermediate_inputs`. +Create two [`~modular_pipelines.ModularPipelineBlocks`]. The first block, `InputBlock`, outputs a `batch_size` value and the second block, `ImageEncoderBlock` uses `batch_size` as `intermediate_inputs`. ```py -from diffusers.modular_pipelines import PipelineBlock, InputParam, OutputParam +from diffusers.modular_pipelines import ModularPipelineBlocks, InputParam, OutputParam -class InputBlock(PipelineBlock): +class InputBlock(ModularPipelineBlocks): @property def inputs(self): @@ -60,9 +60,9 @@ class InputBlock(PipelineBlock): ```py import torch -from diffusers.modular_pipelines import PipelineBlock, InputParam, OutputParam +from diffusers.modular_pipelines import ModularPipelineBlocks, InputParam, OutputParam -class ImageEncoderBlock(PipelineBlock): +class ImageEncoderBlock(ModularPipelineBlocks): @property def inputs(self): diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index c53a980262fc..5968f63c422a 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -36,6 +36,9 @@ "configuration_utils": ["ConfigMixin"], "guiders": [], "hooks": [], + "image_processor": [ + "VaeImageProcessor" + ], "loaders": ["FromOriginalModelMixin"], "models": [], "modular_pipelines": [], @@ -61,11 +64,8 @@ "is_unidecode_available", "logging", ], - "image_processor": [ - "VaeImageProcessor", - ], "video_processor": [ - "VideoProcessor", + "VideoProcessor" ], } From 793d57bbe402b4399402741433ed93df16283cd4 Mon Sep 17 00:00:00 2001 From: DN6 Date: Tue, 12 Aug 2025 13:48:22 +0530 Subject: [PATCH 14/15] update --- .../en/modular_diffusers/modular_diffusers_states.md | 5 +---- .../en/modular_diffusers/sequential_pipeline_blocks.md | 10 +--------- src/diffusers/__init__.py | 8 ++------ 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/docs/source/en/modular_diffusers/modular_diffusers_states.md b/docs/source/en/modular_diffusers/modular_diffusers_states.md index 0a64fc9088fd..eb55b524e491 100644 --- a/docs/source/en/modular_diffusers/modular_diffusers_states.md +++ b/docs/source/en/modular_diffusers/modular_diffusers_states.md @@ -27,8 +27,7 @@ The [`~modular_pipelines.PipelineState`] is a global state container for all blo There are two dict's in [`~modular_pipelines.PipelineState`] for structuring data. -- The `values` dict is an **mutable** state containing a copy of user provided input values and intermediate output values generated by blocks. Blocks can read and write to their `inputs`. If a block modifies an `input`, it will be reflected in the `values` dict after calling `set_block_state`. -- The `kwargs_type` +- The `values` dict is a **mutable** state containing a copy of user provided input values and intermediate output values generated by blocks. If a block modifies an `input`, it will be reflected in the `values` dict after calling `set_block_state`. ```py PipelineState( @@ -74,5 +73,3 @@ def __call__(self, components, state): - `inputs`, a block can modify an input - like `block_state.image` - and this change can be propagated globally to [`~modular_pipelines.PipelineState`] by calling `set_block_state`. - `intermediate_outputs`, is a new variable that a block creates. It is added to the [`~modular_pipelines.PipelineState`]'s `values` dict and is available as for subsequent blocks or accessed by users as a final output from the pipeline. - -If a variable is modified in `block_state` but not declared as an `intermediate_outputs`, it won't be added to [`~modular_pipelines.PipelineState`]. \ No newline at end of file diff --git a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md index 6a18eaf20e89..bbeb28aae5a4 100644 --- a/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md +++ b/docs/source/en/modular_diffusers/sequential_pipeline_blocks.md @@ -33,10 +33,6 @@ class InputBlock(ModularPipelineBlocks): InputParam(name="num_images_per_prompt", type_hint=int, description="number of images per prompt"), ] - @property - def intermediate_inputs(self): - return [] - @property def intermediate_outputs(self): return [ @@ -68,11 +64,6 @@ class ImageEncoderBlock(ModularPipelineBlocks): def inputs(self): return [ InputParam(name="image", type_hint="PIL.Image", description="raw input image to process"), - ] - - @property - def intermediate_inputs(self): - return [ InputParam(name="batch_size", type_hint=int), ] @@ -89,6 +80,7 @@ class ImageEncoderBlock(ModularPipelineBlocks): def __call__(self, components, state): block_state = self.get_block_state(state) # Simulate processing the image + # This will change the state of the image from a PIL image to a tensor for all blocks block_state.image = torch.randn(1, 3, 512, 512) block_state.batch_size = block_state.batch_size * 2 block_state.image_latents = torch.randn(1, 4, 64, 64) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index c6ba589e4ac5..46094219e928 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -36,9 +36,7 @@ "configuration_utils": ["ConfigMixin"], "guiders": [], "hooks": [], - "image_processor": [ - "VaeImageProcessor" - ], + "image_processor": ["VaeImageProcessor"], "loaders": ["FromOriginalModelMixin"], "models": [], "modular_pipelines": [], @@ -64,9 +62,7 @@ "is_unidecode_available", "logging", ], - "video_processor": [ - "VideoProcessor" - ], + "video_processor": ["VideoProcessor"], } try: From 0c26848efcafe3c4110df730480f2b854214c4b8 Mon Sep 17 00:00:00 2001 From: DN6 Date: Tue, 12 Aug 2025 14:21:09 +0530 Subject: [PATCH 15/15] update --- src/diffusers/__init__.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 46094219e928..9e399f9d382b 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -36,7 +36,6 @@ "configuration_utils": ["ConfigMixin"], "guiders": [], "hooks": [], - "image_processor": ["VaeImageProcessor"], "loaders": ["FromOriginalModelMixin"], "models": [], "modular_pipelines": [], @@ -62,7 +61,6 @@ "is_unidecode_available", "logging", ], - "video_processor": ["VideoProcessor"], } try: @@ -907,12 +905,7 @@ WanVACETransformer3DModel, attention_backend, ) - from .modular_pipelines import ( - ComponentsManager, - ComponentSpec, - ModularPipeline, - ModularPipelineBlocks, - ) + from .modular_pipelines import ComponentsManager, ComponentSpec, ModularPipeline, ModularPipelineBlocks from .optimization import ( get_constant_schedule, get_constant_schedule_with_warmup,