15
15
from QEfficient .base .onnx_transforms import FP16ClipTransform , SplitTensorsTransform
16
16
from QEfficient .diffusers .models .pytorch_transforms import AttentionTransform , CustomOpsTransform
17
17
from QEfficient .transformers .models .pytorch_transforms import (
18
- KVCacheExternalModuleMapperTransform ,
19
- KVCacheTransform ,
20
18
T5ModelTransform ,
21
19
)
22
- from QEfficient .transformers .quantizers .quant_transforms import AwqToMatmulNbitsTransform , GPTQToMatmulNbitsTransform
23
20
from QEfficient .utils import constants
24
21
from QEfficient .utils .cache import to_hashable
25
22
26
23
27
24
class QEffTextEncoder (QEFFBaseModel ):
28
25
_pytorch_transforms = [CustomOpsTransform , T5ModelTransform ]
29
26
_onnx_transforms = [FP16ClipTransform , SplitTensorsTransform ]
27
+ """
28
+ QEffTextEncoder is a wrapper class for text encoder models that provides ONNX export and compilation capabilities.
29
+
30
+ This class extends QEFFBaseModel to handle text encoder models (like T5EncoderModel) with specific
31
+ transformations and optimizations for efficient inference on Qualcomm AI hardware.
32
+ """
30
33
31
34
def __init__ (self , model : nn .modules ):
32
35
super ().__init__ (model )
@@ -38,12 +41,16 @@ def get_onnx_config(self):
38
41
39
42
example_inputs = {
40
43
"input_ids" : torch .zeros ((bs , seq_len ), dtype = torch .int64 ),
41
- "output_hidden_states" : True ,
42
44
}
43
45
44
46
dynamic_axes = {"input_ids" : {0 : "batch_size" , 1 : "seq_len" }}
45
47
46
48
output_names = ["pooler_output" , "last_hidden_state" ]
49
+ if self .model .__class__ .__name__ == "T5EncoderModel" :
50
+ output_names = ["last_hidden_state" ]
51
+ else :
52
+ example_inputs ["output_hidden_states" ] = (True ,)
53
+
47
54
return example_inputs , dynamic_axes , output_names
48
55
49
56
def export (self , inputs , output_names , dynamic_axes , export_dir = None ):
@@ -106,15 +113,17 @@ def get_model_config(self) -> dict:
106
113
107
114
108
115
class QEffUNet (QEFFBaseModel ):
109
- _pytorch_transforms = [
110
- AwqToMatmulNbitsTransform ,
111
- GPTQToMatmulNbitsTransform ,
112
- CustomOpsTransform ,
113
- KVCacheTransform ,
114
- KVCacheExternalModuleMapperTransform ,
115
- ]
116
+ _pytorch_transforms = [CustomOpsTransform ]
116
117
_onnx_transforms = [FP16ClipTransform , SplitTensorsTransform ]
117
118
119
+ """
120
+ QEffUNet is a wrapper class for UNet models that provides ONNX export and compilation capabilities.
121
+
122
+ This class extends QEFFBaseModel to handle UNet models with specific transformations and optimizations
123
+ for efficient inference on Qualcomm AI hardware. It is commonly used in diffusion models for image
124
+ generation tasks.
125
+ """
126
+
118
127
def __init__ (self , model : nn .modules ):
119
128
super ().__init__ (model .unet )
120
129
self .model = model .unet
@@ -168,13 +177,17 @@ def get_model_config(self) -> dict:
168
177
169
178
170
179
class QEffVAE (QEFFBaseModel ):
171
- _pytorch_transforms = [
172
- AwqToMatmulNbitsTransform ,
173
- GPTQToMatmulNbitsTransform ,
174
- CustomOpsTransform ,
175
- ]
180
+ _pytorch_transforms = [CustomOpsTransform ]
176
181
_onnx_transforms = [FP16ClipTransform , SplitTensorsTransform ]
177
182
183
+ """
184
+ QEffVAE is a wrapper class for Variational Autoencoder (VAE) models that provides ONNX export and compilation capabilities.
185
+
186
+ This class extends QEFFBaseModel to handle VAE models with specific transformations and optimizations
187
+ for efficient inference on Qualcomm AI hardware. VAE models are commonly used in diffusion pipelines
188
+ for encoding images to latent space and decoding latent representations back to images.
189
+ """
190
+
178
191
def __init__ (self , model : nn .modules , type : str ):
179
192
super ().__init__ (model .vae )
180
193
self .model = copy .deepcopy (model .vae )
@@ -259,13 +272,17 @@ def get_model_config(self) -> dict:
259
272
260
273
261
274
class QEffSafetyChecker (QEFFBaseModel ):
262
- _pytorch_transforms = [
263
- AwqToMatmulNbitsTransform ,
264
- GPTQToMatmulNbitsTransform ,
265
- CustomOpsTransform ,
266
- ]
275
+ _pytorch_transforms = [CustomOpsTransform ]
267
276
_onnx_transforms = [FP16ClipTransform , SplitTensorsTransform ]
268
277
278
+ """
279
+ QEffSafetyChecker is a wrapper class for safety checker models that provides ONNX export and compilation capabilities.
280
+
281
+ This class extends QEFFBaseModel to handle safety checker models with specific transformations and optimizations
282
+ for efficient inference on Qualcomm AI hardware. Safety checker models are commonly used in diffusion pipelines
283
+ to filter out potentially harmful or inappropriate generated content.
284
+ """
285
+
269
286
def __init__ (self , model : nn .modules ):
270
287
super ().__init__ (model .vae )
271
288
self .model = model .safety_checker
@@ -322,6 +339,14 @@ class QEffSD3Transformer2DModel(QEFFBaseModel):
322
339
_pytorch_transforms = [AttentionTransform , CustomOpsTransform ]
323
340
_onnx_transforms = [FP16ClipTransform , SplitTensorsTransform ]
324
341
342
+ """
343
+ QEffSD3Transformer2DModel is a wrapper class for Stable Diffusion 3 Transformer2D models that provides ONNX export and compilation capabilities.
344
+
345
+ This class extends QEFFBaseModel to handle SD3 Transformer2D models with specific transformations and optimizations
346
+ for efficient inference on Qualcomm AI hardware. It is designed for the newer Stable Diffusion 3 architecture
347
+ that uses transformer-based diffusion models instead of traditional UNet architectures.
348
+ """
349
+
325
350
def __init__ (self , model : nn .modules ):
326
351
super ().__init__ (model )
327
352
self .model = model
0 commit comments