Skip to content

Commit 8e53cd9

Browse files
naykunyiyixuxu
andauthored
Qwen-Image (#12055)
* (feat): qwen-image integration * fix(qwen-image): - remove unused logics related to controlnet/ip-adapter * fix(qwen-image): - compatible with attention dispatcher - cond cache support * fix(qwen-image): - cond cache registry - attention backend argument - fix copies * fix(qwen-image): - remove local test * Update src/diffusers/models/transformers/transformer_qwenimage.py --------- Co-authored-by: YiYi Xu <[email protected]>
1 parent 359b605 commit 8e53cd9

File tree

13 files changed

+2661
-0
lines changed

13 files changed

+2661
-0
lines changed

src/diffusers/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@
174174
"AutoencoderKLLTXVideo",
175175
"AutoencoderKLMagvit",
176176
"AutoencoderKLMochi",
177+
"AutoencoderKLQwenImage",
177178
"AutoencoderKLTemporalDecoder",
178179
"AutoencoderKLWan",
179180
"AutoencoderOobleck",
@@ -215,6 +216,7 @@
215216
"OmniGenTransformer2DModel",
216217
"PixArtTransformer2DModel",
217218
"PriorTransformer",
219+
"QwenImageTransformer2DModel",
218220
"SanaControlNetModel",
219221
"SanaTransformer2DModel",
220222
"SD3ControlNetModel",
@@ -486,6 +488,7 @@
486488
"PixArtAlphaPipeline",
487489
"PixArtSigmaPAGPipeline",
488490
"PixArtSigmaPipeline",
491+
"QwenImagePipeline",
489492
"ReduxImageEncoder",
490493
"SanaControlNetPipeline",
491494
"SanaPAGPipeline",
@@ -832,6 +835,7 @@
832835
AutoencoderKLLTXVideo,
833836
AutoencoderKLMagvit,
834837
AutoencoderKLMochi,
838+
AutoencoderKLQwenImage,
835839
AutoencoderKLTemporalDecoder,
836840
AutoencoderKLWan,
837841
AutoencoderOobleck,
@@ -873,6 +877,7 @@
873877
OmniGenTransformer2DModel,
874878
PixArtTransformer2DModel,
875879
PriorTransformer,
880+
QwenImageTransformer2DModel,
876881
SanaControlNetModel,
877882
SanaTransformer2DModel,
878883
SD3ControlNetModel,
@@ -1119,6 +1124,7 @@
11191124
PixArtAlphaPipeline,
11201125
PixArtSigmaPAGPipeline,
11211126
PixArtSigmaPipeline,
1127+
QwenImagePipeline,
11221128
ReduxImageEncoder,
11231129
SanaControlNetPipeline,
11241130
SanaPAGPipeline,

src/diffusers/hooks/_helpers.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ def _register_transformer_blocks_metadata():
153153
)
154154
from ..models.transformers.transformer_ltx import LTXVideoTransformerBlock
155155
from ..models.transformers.transformer_mochi import MochiTransformerBlock
156+
from ..models.transformers.transformer_qwenimage import QwenImageTransformerBlock
156157
from ..models.transformers.transformer_wan import WanTransformerBlock
157158

158159
# BasicTransformerBlock
@@ -255,6 +256,15 @@ def _register_transformer_blocks_metadata():
255256
),
256257
)
257258

259+
# QwenImage
260+
TransformerBlockRegistry.register(
261+
model_class=QwenImageTransformerBlock,
262+
metadata=TransformerBlockMetadata(
263+
return_hidden_states_index=1,
264+
return_encoder_hidden_states_index=0,
265+
),
266+
)
267+
258268

259269
# fmt: off
260270
def _skip_attention___ret___hidden_states(self, *args, **kwargs):

src/diffusers/models/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
_import_structure["autoencoders.autoencoder_kl_ltx"] = ["AutoencoderKLLTXVideo"]
3939
_import_structure["autoencoders.autoencoder_kl_magvit"] = ["AutoencoderKLMagvit"]
4040
_import_structure["autoencoders.autoencoder_kl_mochi"] = ["AutoencoderKLMochi"]
41+
_import_structure["autoencoders.autoencoder_kl_qwenimage"] = ["AutoencoderKLQwenImage"]
4142
_import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"]
4243
_import_structure["autoencoders.autoencoder_kl_wan"] = ["AutoencoderKLWan"]
4344
_import_structure["autoencoders.autoencoder_oobleck"] = ["AutoencoderOobleck"]
@@ -88,6 +89,7 @@
8889
_import_structure["transformers.transformer_lumina2"] = ["Lumina2Transformer2DModel"]
8990
_import_structure["transformers.transformer_mochi"] = ["MochiTransformer3DModel"]
9091
_import_structure["transformers.transformer_omnigen"] = ["OmniGenTransformer2DModel"]
92+
_import_structure["transformers.transformer_qwenimage"] = ["QwenImageTransformer2DModel"]
9193
_import_structure["transformers.transformer_sd3"] = ["SD3Transformer2DModel"]
9294
_import_structure["transformers.transformer_skyreels_v2"] = ["SkyReelsV2Transformer3DModel"]
9395
_import_structure["transformers.transformer_temporal"] = ["TransformerTemporalModel"]
@@ -126,6 +128,7 @@
126128
AutoencoderKLLTXVideo,
127129
AutoencoderKLMagvit,
128130
AutoencoderKLMochi,
131+
AutoencoderKLQwenImage,
129132
AutoencoderKLTemporalDecoder,
130133
AutoencoderKLWan,
131134
AutoencoderOobleck,
@@ -177,6 +180,7 @@
177180
OmniGenTransformer2DModel,
178181
PixArtTransformer2DModel,
179182
PriorTransformer,
183+
QwenImageTransformer2DModel,
180184
SanaTransformer2DModel,
181185
SD3Transformer2DModel,
182186
SkyReelsV2Transformer3DModel,

src/diffusers/models/autoencoders/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .autoencoder_kl_ltx import AutoencoderKLLTXVideo
99
from .autoencoder_kl_magvit import AutoencoderKLMagvit
1010
from .autoencoder_kl_mochi import AutoencoderKLMochi
11+
from .autoencoder_kl_qwenimage import AutoencoderKLQwenImage
1112
from .autoencoder_kl_temporal_decoder import AutoencoderKLTemporalDecoder
1213
from .autoencoder_kl_wan import AutoencoderKLWan
1314
from .autoencoder_oobleck import AutoencoderOobleck

0 commit comments

Comments
 (0)