|
136 | 136 | Qwen2Model,
|
137 | 137 | Qwen2RMSNorm,
|
138 | 138 | )
|
| 139 | +from transformers.models.qwen3_moe.modeling_qwen3_moe import ( |
| 140 | + Qwen3MoeAttention, |
| 141 | + Qwen3MoeDecoderLayer, |
| 142 | + Qwen3MoeForCausalLM, |
| 143 | + Qwen3MoeModel, |
| 144 | + Qwen3MoeRMSNorm, |
| 145 | + Qwen3MoeRotaryEmbedding, |
| 146 | + Qwen3MoeSparseMoeBlock, |
| 147 | +) |
139 | 148 | from transformers.models.starcoder2.modeling_starcoder2 import (
|
140 | 149 | Starcoder2Attention,
|
141 | 150 | Starcoder2DecoderLayer,
|
|
303 | 312 | QEffQwen2ForCausalLM,
|
304 | 313 | QEffQwen2Model,
|
305 | 314 | )
|
| 315 | +from QEfficient.transformers.models.qwen3_moe.modeling_qwen3_moe import ( |
| 316 | + QEffQwen3MoeAttention, |
| 317 | + QEffQwen3MoeDecoderLayer, |
| 318 | + QEffQwen3MoeForCausalLM, |
| 319 | + QEffQwen3MoeModel, |
| 320 | + QEffQwen3MoeRotaryEmbedding, |
| 321 | + QEffQwen3MoeSparseMoeBlock, |
| 322 | +) |
306 | 323 | from QEfficient.transformers.models.starcoder2.modeling_starcoder2 import (
|
307 | 324 | QEffStarcoder2Attention,
|
308 | 325 | QEFFStarcoder2DecoderLayer,
|
@@ -338,6 +355,7 @@ class CustomOpsTransform(ModuleMappingTransform):
|
338 | 355 | MllamaTextRMSNorm: CustomRMSNormAIC,
|
339 | 356 | GraniteRMSNorm: CustomRMSNormAIC,
|
340 | 357 | GraniteMoeRMSNorm: CustomRMSNormAIC,
|
| 358 | + Qwen3MoeRMSNorm: CustomRMSNormAIC, |
341 | 359 | Gemma3RMSNorm: QEffGemma3CustomRMSNormAIC,
|
342 | 360 | }
|
343 | 361 |
|
@@ -388,6 +406,13 @@ class KVCacheTransform(ModuleMappingTransform):
|
388 | 406 | GemmaDecoderLayer: QEffGemmaDecoderLayer,
|
389 | 407 | GemmaModel: QEffGemmaModel,
|
390 | 408 | GemmaForCausalLM: QEffGemmaForCausalLM,
|
| 409 | + # Qwen3Moe |
| 410 | + Qwen3MoeForCausalLM: QEffQwen3MoeForCausalLM, |
| 411 | + Qwen3MoeModel: QEffQwen3MoeModel, |
| 412 | + Qwen3MoeDecoderLayer: QEffQwen3MoeDecoderLayer, |
| 413 | + Qwen3MoeAttention: QEffQwen3MoeAttention, |
| 414 | + Qwen3MoeRotaryEmbedding: QEffQwen3MoeRotaryEmbedding, |
| 415 | + Qwen3MoeSparseMoeBlock: QEffQwen3MoeSparseMoeBlock, |
391 | 416 | # Gemma2
|
392 | 417 | Gemma2Attention: QEffGemma2Attention,
|
393 | 418 | Gemma2DecoderLayer: QEffGemma2DecoderLayer,
|
|
0 commit comments