Skip to content

Commit 362ef72

Browse files
committed
Code cleanup-2
Signed-off-by: Amit Raj <[email protected]>
1 parent e579c88 commit 362ef72

File tree

3 files changed

+50
-24
lines changed

3 files changed

+50
-24
lines changed

QEfficient/diffusers/pipelines/pipeline_utils.py

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,21 @@
1515
from QEfficient.base.onnx_transforms import FP16ClipTransform, SplitTensorsTransform
1616
from QEfficient.diffusers.models.pytorch_transforms import AttentionTransform, CustomOpsTransform
1717
from QEfficient.transformers.models.pytorch_transforms import (
18-
KVCacheExternalModuleMapperTransform,
19-
KVCacheTransform,
2018
T5ModelTransform,
2119
)
22-
from QEfficient.transformers.quantizers.quant_transforms import AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform
2320
from QEfficient.utils import constants
2421
from QEfficient.utils.cache import to_hashable
2522

2623

2724
class QEffTextEncoder(QEFFBaseModel):
2825
_pytorch_transforms = [CustomOpsTransform, T5ModelTransform]
2926
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
27+
"""
28+
QEffTextEncoder is a wrapper class for text encoder models that provides ONNX export and compilation capabilities.
29+
30+
This class extends QEFFBaseModel to handle text encoder models (like T5EncoderModel) with specific
31+
transformations and optimizations for efficient inference on Qualcomm AI hardware.
32+
"""
3033

3134
def __init__(self, model: nn.modules):
3235
super().__init__(model)
@@ -38,12 +41,16 @@ def get_onnx_config(self):
3841

3942
example_inputs = {
4043
"input_ids": torch.zeros((bs, seq_len), dtype=torch.int64),
41-
"output_hidden_states": True,
4244
}
4345

4446
dynamic_axes = {"input_ids": {0: "batch_size", 1: "seq_len"}}
4547

4648
output_names = ["pooler_output", "last_hidden_state"]
49+
if self.model.__class__.__name__ == "T5EncoderModel":
50+
output_names = ["last_hidden_state"]
51+
else:
52+
example_inputs["output_hidden_states"] = (True,)
53+
4754
return example_inputs, dynamic_axes, output_names
4855

4956
def export(self, inputs, output_names, dynamic_axes, export_dir=None):
@@ -106,15 +113,17 @@ def get_model_config(self) -> dict:
106113

107114

108115
class QEffUNet(QEFFBaseModel):
109-
_pytorch_transforms = [
110-
AwqToMatmulNbitsTransform,
111-
GPTQToMatmulNbitsTransform,
112-
CustomOpsTransform,
113-
KVCacheTransform,
114-
KVCacheExternalModuleMapperTransform,
115-
]
116+
_pytorch_transforms = [CustomOpsTransform]
116117
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
117118

119+
"""
120+
QEffUNet is a wrapper class for UNet models that provides ONNX export and compilation capabilities.
121+
122+
This class extends QEFFBaseModel to handle UNet models with specific transformations and optimizations
123+
for efficient inference on Qualcomm AI hardware. It is commonly used in diffusion models for image
124+
generation tasks.
125+
"""
126+
118127
def __init__(self, model: nn.modules):
119128
super().__init__(model.unet)
120129
self.model = model.unet
@@ -168,13 +177,17 @@ def get_model_config(self) -> dict:
168177

169178

170179
class QEffVAE(QEFFBaseModel):
171-
_pytorch_transforms = [
172-
AwqToMatmulNbitsTransform,
173-
GPTQToMatmulNbitsTransform,
174-
CustomOpsTransform,
175-
]
180+
_pytorch_transforms = [CustomOpsTransform]
176181
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
177182

183+
"""
184+
QEffVAE is a wrapper class for Variational Autoencoder (VAE) models that provides ONNX export and compilation capabilities.
185+
186+
This class extends QEFFBaseModel to handle VAE models with specific transformations and optimizations
187+
for efficient inference on Qualcomm AI hardware. VAE models are commonly used in diffusion pipelines
188+
for encoding images to latent space and decoding latent representations back to images.
189+
"""
190+
178191
def __init__(self, model: nn.modules, type: str):
179192
super().__init__(model.vae)
180193
self.model = copy.deepcopy(model.vae)
@@ -259,13 +272,17 @@ def get_model_config(self) -> dict:
259272

260273

261274
class QEffSafetyChecker(QEFFBaseModel):
262-
_pytorch_transforms = [
263-
AwqToMatmulNbitsTransform,
264-
GPTQToMatmulNbitsTransform,
265-
CustomOpsTransform,
266-
]
275+
_pytorch_transforms = [CustomOpsTransform]
267276
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
268277

278+
"""
279+
QEffSafetyChecker is a wrapper class for safety checker models that provides ONNX export and compilation capabilities.
280+
281+
This class extends QEFFBaseModel to handle safety checker models with specific transformations and optimizations
282+
for efficient inference on Qualcomm AI hardware. Safety checker models are commonly used in diffusion pipelines
283+
to filter out potentially harmful or inappropriate generated content.
284+
"""
285+
269286
def __init__(self, model: nn.modules):
270287
super().__init__(model.vae)
271288
self.model = model.safety_checker
@@ -322,6 +339,14 @@ class QEffSD3Transformer2DModel(QEFFBaseModel):
322339
_pytorch_transforms = [AttentionTransform, CustomOpsTransform]
323340
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
324341

342+
"""
343+
QEffSD3Transformer2DModel is a wrapper class for Stable Diffusion 3 Transformer2D models that provides ONNX export and compilation capabilities.
344+
345+
This class extends QEFFBaseModel to handle SD3 Transformer2D models with specific transformations and optimizations
346+
for efficient inference on Qualcomm AI hardware. It is designed for the newer Stable Diffusion 3 architecture
347+
that uses transformer-based diffusion models instead of traditional UNet architectures.
348+
"""
349+
325350
def __init__(self, model: nn.modules):
326351
super().__init__(model)
327352
self.model = model

QEfficient/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ def compile(
242242
)
243243

244244
self.text_encoder_compile_path = self.text_encoder._compile(
245+
onnx_path,
245246
compile_dir,
246247
compile_only=True,
247248
specializations=specializations_text_encoder,

examples/diffusers/stable_diffusion_3/stable_diffusion_35_example.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77

88
from QEfficient import QEFFStableDiffusion3Pipeline
99

10-
pipeline = QEFFStableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large")
10+
pipeline = QEFFStableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo")
1111
pipeline.compile(num_devices_text_encoder=1, num_devices_transformer=4, num_devices_vae_decoder=1)
1212

13-
# NOTE: guidance_scale <=1 is not supported
13+
# NOTE: guidance_scale <=1 is not supported
1414
image = pipeline("A girl laughing", num_inference_steps=28, guidance_scale=2.0).images[0]
15-
image.save("girl_laughing.png")
15+
image.save("girl_laughing_turbo.png")

0 commit comments

Comments
 (0)