Skip to content

Commit 4c68412

Browse files
authored
Onboarding Qwen3moe (#406)
Onboarding Qwen3Moe <img width="1031" height="407" alt="image" src="https://github.com/user-attachments/assets/095876e2-3fee-403f-8f84-16c41e062423" /> --------- Signed-off-by: Dipankar Sarkar <[email protected]> Signed-off-by: Dipankar Sarkar <[email protected]>
1 parent b7775c9 commit 4c68412

File tree

7 files changed

+572
-0
lines changed

7 files changed

+572
-0
lines changed

QEfficient/transformers/models/pytorch_transforms.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,15 @@
136136
Qwen2Model,
137137
Qwen2RMSNorm,
138138
)
139+
from transformers.models.qwen3_moe.modeling_qwen3_moe import (
140+
Qwen3MoeAttention,
141+
Qwen3MoeDecoderLayer,
142+
Qwen3MoeForCausalLM,
143+
Qwen3MoeModel,
144+
Qwen3MoeRMSNorm,
145+
Qwen3MoeRotaryEmbedding,
146+
Qwen3MoeSparseMoeBlock,
147+
)
139148
from transformers.models.starcoder2.modeling_starcoder2 import (
140149
Starcoder2Attention,
141150
Starcoder2DecoderLayer,
@@ -303,6 +312,14 @@
303312
QEffQwen2ForCausalLM,
304313
QEffQwen2Model,
305314
)
315+
from QEfficient.transformers.models.qwen3_moe.modeling_qwen3_moe import (
316+
QEffQwen3MoeAttention,
317+
QEffQwen3MoeDecoderLayer,
318+
QEffQwen3MoeForCausalLM,
319+
QEffQwen3MoeModel,
320+
QEffQwen3MoeRotaryEmbedding,
321+
QEffQwen3MoeSparseMoeBlock,
322+
)
306323
from QEfficient.transformers.models.starcoder2.modeling_starcoder2 import (
307324
QEffStarcoder2Attention,
308325
QEFFStarcoder2DecoderLayer,
@@ -338,6 +355,7 @@ class CustomOpsTransform(ModuleMappingTransform):
338355
MllamaTextRMSNorm: CustomRMSNormAIC,
339356
GraniteRMSNorm: CustomRMSNormAIC,
340357
GraniteMoeRMSNorm: CustomRMSNormAIC,
358+
Qwen3MoeRMSNorm: CustomRMSNormAIC,
341359
Gemma3RMSNorm: QEffGemma3CustomRMSNormAIC,
342360
}
343361

@@ -388,6 +406,13 @@ class KVCacheTransform(ModuleMappingTransform):
388406
GemmaDecoderLayer: QEffGemmaDecoderLayer,
389407
GemmaModel: QEffGemmaModel,
390408
GemmaForCausalLM: QEffGemmaForCausalLM,
409+
# Qwen3Moe
410+
Qwen3MoeForCausalLM: QEffQwen3MoeForCausalLM,
411+
Qwen3MoeModel: QEffQwen3MoeModel,
412+
Qwen3MoeDecoderLayer: QEffQwen3MoeDecoderLayer,
413+
Qwen3MoeAttention: QEffQwen3MoeAttention,
414+
Qwen3MoeRotaryEmbedding: QEffQwen3MoeRotaryEmbedding,
415+
Qwen3MoeSparseMoeBlock: QEffQwen3MoeSparseMoeBlock,
391416
# Gemma2
392417
Gemma2Attention: QEffGemma2Attention,
393418
Gemma2DecoderLayer: QEffGemma2DecoderLayer,
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)